path: root/helix-core/src/syntax.rs
blob: 32974e11fe24a67562829ef4f92921cfdfc1557a (plain) (tree)



































































use crate::{Change, Rope, RopeSlice, Transaction};
pub use helix_syntax::LANG;
pub use helix_syntax::{get_language, get_language_name};

use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use once_cell::sync::OnceCell;

// largely based on tree-sitter/cli/src/loader.rs
pub struct LanguageConfiguration {
    pub(crate) scope: String,           // source.rust
    pub(crate) file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>

    pub(crate) path: PathBuf,

    // content_regex
    // injection_regex
    // first_line_regex
    // root_path
    pub(crate) language_id: LANG,
    pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>,
    // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583

impl LanguageConfiguration {
    pub fn highlight_config(
        scopes: &[String],
    ) -> Result<Option<&Arc<HighlightConfiguration>>, anyhow::Error> {
            .get_or_try_init(|| {
                // let name = get_language_name(&self.language_id);

                let highlights_query =

                let injections_query =

                let locals_query = "";

                if highlights_query.is_empty() {
                } else {
                    let language = get_language(self.language_id);
                    let mut config = HighlightConfiguration::new(
                    .unwrap(); // TODO: no unwrap

    pub fn scope(&self) -> &str {

use once_cell::sync::Lazy;

pub static LOADER: Lazy<Loader> = Lazy::new(Loader::init);

pub struct Loader {
    // highlight_names ?
    language_configs: Vec<Arc<LanguageConfiguration>>,
    language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>

impl Loader {
    fn init() -> Loader {
        let mut loader = Loader {
            language_configs: Vec::new(),
            language_config_ids_by_file_type: HashMap::new(),

        // hardcoded from now, might load from toml
        let configs = vec![
            LanguageConfiguration {
                scope: "source.rust".to_string(),
                file_types: vec!["rs".to_string()],
                language_id: LANG::Rust,
                highlight_config: OnceCell::new(),
                path: "../helix-syntax/languages/tree-sitter-rust".into(),
            LanguageConfiguration {
                scope: "source.toml".to_string(),
                file_types: vec!["toml".to_string()],
                language_id: LANG::Toml,
                highlight_config: OnceCell::new(),
                path: "../helix-syntax/languages/tree-sitter-toml".into(),

        for config in configs {
            // get the next id
            let language_id = loader.language_configs.len();

            for file_type in &config.file_types {
                // entry().or_insert(Vec::new).push(language_id);
                    .insert(file_type.clone(), language_id);



    pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
        // Find all the language configurations that match this file name
        // or a suffix of the file name.
        let configuration_id = path
            .and_then(|n| n.to_str())
            .and_then(|file_name| self.language_config_ids_by_file_type.get(file_name))
            .or_else(|| {
                    .and_then(|extension| extension.to_str())
                    .and_then(|extension| self.language_config_ids_by_file_type.get(extension))

        configuration_id.and_then(|&id| self.language_configs.get(id).cloned())

        // TODO: content_regex handling conflict resolution

    pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> {
            .find(|config| config.scope == scope)


pub struct Syntax {
    // grammar: Grammar,
    parser: Parser,
    cursors: Vec<QueryCursor>,

    config: Arc<HighlightConfiguration>,

    pub(crate) root_layer: LanguageLayer,

impl Syntax {
    // buffer, grammar, config, grammars, sync_timeout?
    pub fn new(
        /*language: LANG,*/ source: &Rope,
        config: Arc<HighlightConfiguration>,
    ) -> Self {
        // fetch grammar for parser based on language string
        // let grammar = get_language(&language);
        let parser = Parser::new();

        let root_layer = LanguageLayer { tree: None };

        // track markers of injections
        // track scope_descriptor: a Vec of scopes for item in tree

        let mut syntax = Self {
            // grammar,
            cursors: Vec::new(),

        // update root layer
            &mut syntax.parser,
            vec![Range {
                start_byte: 0,
                end_byte: usize::MAX,
                start_point: Point::new(0, 0),
                end_point: Point::new(usize::MAX, usize::MAX),

    pub fn update(
        &mut self,
        old_source: &Rope,
        source: &Rope,
        changeset: &ChangeSet,
    ) -> Result<(), Error> {
            &mut self.parser,

        // TODO: deal with injections and update them too

    // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers

    // call this on transaction.apply() -> buffer_changed(changes)
    // fn parse(language, old_tree, ranges)
    fn tree(&self) -> &Tree {
    // <!--update_for_injection(grammar)-->

    // Highlighting

    /// Iterate over the highlighted regions for a given slice of source code.
    pub fn highlight_iter<'a>(
        &'a mut self,
        source: &'a [u8],
        range: Option<std::ops::Range<usize>>,
        cancellation_flag: Option<&'a AtomicUsize>,
        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
        // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
        // prevents them from being moved. But both of these values are really just
        // pointers, so it's actually ok to move them.

        let mut cursor = QueryCursor::new(); // reuse a pool
        let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(self.tree()) };
        let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
        let query_ref = unsafe { mem::transmute::<_, &'static Query>(&self.config.query) };
        let config_ref =
            unsafe { mem::transmute::<_, &'static HighlightConfiguration>(self.config.as_ref()) };

        // TODO: if reusing cursors this might need resetting
        if let Some(range) = &range {
            cursor_ref.set_byte_range(range.start, range.end);

        let captures = cursor_ref
            .captures(query_ref, tree_ref.root_node(), move |n: Node| {

        // manually craft the root layer based on the existing tree
        let layer = HighlightIterLayer {
            highlight_end_stack: Vec::new(),
            scope_stack: vec![LocalScope {
                inherits: false,
                range: 0..usize::MAX,
                local_defs: Vec::new(),
            depth: 0,
            _tree: None,
            config: config_ref,
            ranges: vec![Range {
                start_byte: 0,
                end_byte: usize::MAX,
                start_point: Point::new(0, 0),
                end_point: Point::new(usize::MAX, usize::MAX),

        let mut result = HighlightIter {
            byte_offset: range.map(|r| r.start).unwrap_or(0), // TODO: simplify
            highlighter: self,
            iter_count: 0,
            layers: vec![layer],
            next_event: None,
            last_highlight_range: None,
    // on_tokenize
    // on_change_highlighting

    // Commenting
    // comment_strings_for_pos
    // is_commented

    // Indentation
    // suggested_indent_for_line_at_buffer_row
    // suggested_indent_for_buffer_row
    // indent_level_for_line

    // TODO: Folding

    // Syntax APIs
    // get_syntax_node_containing_range ->
    // ...
    // get_syntax_node_at_pos
    // buffer_range_for_scope_at_pos

pub struct LanguageLayer {
    // mode
    // grammar
    // depth
    pub(crate) tree: Option<Tree>,

use crate::state::coords_at_pos;
use crate::transaction::{ChangeSet, Operation};
use crate::Tendril;

impl LanguageLayer {
    // pub fn new() -> Self {
    //     Self { tree: None }
    // }

    fn tree(&self) -> &Tree {
        // TODO: no unwrap

    fn parse(
        &mut self,
        parser: &mut Parser,
        config: &HighlightConfiguration,
        source: &Rope,
        mut depth: usize,
        mut ranges: Vec<Range>,
    ) -> Result<(), Error> {
        if parser.set_included_ranges(&ranges).is_ok() {
                .map_err(|_| Error::InvalidLanguage)?;

            // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
            let tree = parser
                    &mut |byte, _| {
                        if byte <= source.len_bytes() {
                            let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
                            chunk[byte - start_byte..].as_bytes()
                        } else {
                            // out of range
            // unsafe { syntax.parser.set_cancellation_flag(None) };
            // let mut cursor = syntax.cursors.pop().unwrap_or_else(QueryCursor::new);

            // Process combined injections. (ERB, EJS, etc https://github.com/tree-sitter/tree-sitter/pull/526)
            // if let Some(combined_injections_query) = &config.combined_injections_query {
            //     let mut injections_by_pattern_index =
            //         vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
            //     let matches =
            //         cursor.matches(combined_injections_query, tree.root_node(), |n: Node| {
            //             &source[n.byte_range()]
            //         });
            //     for mat in matches {
            //         let entry = &mut injections_by_pattern_index[mat.pattern_index];
            //         let (language_name, content_node, include_children) =
            //             injection_for_match(config, combined_injections_query, &mat, source);
            //         if language_name.is_some() {
            //             entry.0 = language_name;
            //         }
            //         if let Some(content_node) = content_node {
            //             entry.1.push(content_node);
            //         }
            //         entry.2 = include_children;
            //     }
            //     for (lang_name, content_nodes, includes_children) in injections_by_pattern_index {
            //         if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
            //             if let Some(next_config) = (injection_callback)(lang_name) {
            //                 let ranges =
            //                     Self::intersect_ranges(&ranges, &content_nodes, includes_children);
            //                 if !ranges.is_empty() {
            //                     queue.push((next_config, depth + 1, ranges));
            //                 }
            //             }
            //         }
            //     }
            // }
            self.tree = Some(tree)

    pub(crate) fn generate_edits(
        old_text: RopeSlice,
        changeset: &ChangeSet,
    ) -> Vec<tree_sitter::InputEdit> {
        use Operation::*;
        let mut old_pos = 0;
        let mut new_pos = 0;

        let mut edits = Vec::new();

        let mut iter = changeset.changes.iter().peekable();

        // TODO; this is a lot easier with Change instead of Operation.

        fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) {
            let byte = text.char_to_byte(pos); // <- attempted to index past end
            let line = text.char_to_line(pos);
            let line_start_byte = text.line_to_byte(line);
            let col = byte - line_start_byte;

            (byte, Point::new(line, col))

        fn traverse(point: Point, text: &Tendril) -> Point {
            let Point {
                mut row,
                mut column,
            } = point;

            // TODO: there should be a better way here
            for ch in text.bytes() {
                if ch == b'\n' {
                    row += 1;
                    column = 0;
                } else {
                    column += 1;
            Point { row, column }

        while let Some(change) = iter.next() {
            let len = match change {
                Delete(i) | Retain(i) => *i,
                Insert(_) => 0,
            let mut old_end = old_pos + len;

            match change {
                Retain(_) => {
                    new_pos += len;
                Delete(_) => {
                    let (start_byte, start_position) = point_at_pos(old_text, old_pos);
                    let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);

                    // TODO: Position also needs to be byte based...
                    // let byte = char_to_byte(old_pos)
                    // let line = char_to_line(old_pos)
                    // let line_start_byte = line_to_byte()
                    // Position::new(line, line_start_byte - byte)

                    // deletion
                    edits.push(tree_sitter::InputEdit {
                        start_byte,                       // old_pos to byte
                        old_end_byte,                     // old_end to byte
                        new_end_byte: start_byte,         // old_pos to byte
                        start_position,                   // old pos to coords
                        old_end_position,                 // old_end to coords
                        new_end_position: start_position, // old pos to coords
                Insert(s) => {
                    let (start_byte, start_position) = point_at_pos(old_text, old_pos);

                    let ins = s.chars().count();

                    // a subsequent delete means a replace, consume it
                    if let Some(Delete(len)) = iter.peek() {
                        old_end = old_pos + len;
                        let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);


                        // replacement
                        edits.push(tree_sitter::InputEdit {
                            start_byte,                                    // old_pos to byte
                            old_end_byte,                                  // old_end to byte
                            new_end_byte: start_byte + s.len(), // old_pos to byte + s.len()
                            start_position,                     // old pos to coords
                            old_end_position,                   // old_end to coords
                            new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
                    } else {
                        // insert
                        edits.push(tree_sitter::InputEdit {
                            start_byte,                                    // old_pos to byte
                            old_end_byte: start_byte,                      // same
                            new_end_byte: start_byte + s.len(),            // old_pos + s.len()
                            start_position,                                // old pos to coords
                            old_end_position: start_position,              // same
                            new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)

                    new_pos += ins;
            old_pos = old_end;

    fn update(
        &mut self,
        parser: &mut Parser,
        config: &HighlightConfiguration,
        old_source: &Rope,
        source: &Rope,
        changeset: &ChangeSet,
    ) -> Result<(), Error> {
        if changeset.is_empty() {
            return Ok(());

        let edits = Self::generate_edits(old_source.slice(..), changeset);

        // Notify the tree about all the changes
        for edit in edits {

            // TODO: what to do about this range on update
            vec![Range {
                start_byte: 0,
                end_byte: usize::MAX,
                start_point: Point::new(0, 0),
                end_point: Point::new(usize::MAX, usize::MAX),

    // fn highlight_iter() -> same as Mode but for this layer. Mode composits these
    // fn buffer_changed
    // fn update(range)
    // fn update_injections()

// -- refactored from tree-sitter-highlight to be able to retain state
// TODO: add seek() to iter

// problem: any time a layer is updated it must update it's injections on the parent (potentially
// removing some from use)
// can't modify to vec and exist in it at the same time since that would violate borrows
// maybe we can do with an arena
// maybe just caching on the top layer and nevermind the injections for now?
// Grammar {
//  layers: Vec<Box<Layer>> to prevent memory moves when vec is modified
// }
// injections tracked by marker:
// if marker areas match it's fine and update
// if not found add new layer
// if length 0 then area got removed, clean up the layer
// layer update:
// if range.len = 0 then remove the layer
// for change in changes { tree.edit(change) }
// tree = parser.parse(.., tree, ..)
// calculate affected range and update injections
// injection update:
// look for existing injections
// if present, range = (first injection start, last injection end)
// For now cheat and just throw out non-root layers if they exist. This should still improve
// parsing in majority of cases.

use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize};
use tree_sitter::{
    Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
    QueryMatch, Range, Tree,


/// Indicates which highlight should be applied to a region of source code.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Highlight(pub usize);

/// Represents the reason why syntax highlighting failed.
#[derive(Debug, PartialEq, Eq)]
pub enum Error {

/// Represents a single step in rendering a syntax-highlighted document.
#[derive(Copy, Clone, Debug)]
pub enum HighlightEvent {
    Source { start: usize, end: usize },

/// Contains the data neeeded to higlight code written in a particular language.
/// This struct is immutable and can be shared between threads.
pub struct HighlightConfiguration {
    pub language: Grammar,
    pub query: Query,
    combined_injections_query: Option<Query>,
    locals_pattern_index: usize,
    highlights_pattern_index: usize,
    highlight_indices: Vec<Option<Highlight>>,
    non_local_variable_patterns: Vec<bool>,
    injection_content_capture_index: Option<u32>,
    injection_language_capture_index: Option<u32>,
    local_scope_capture_index: Option<u32>,
    local_def_capture_index: Option<u32>,
    local_def_value_capture_index: Option<u32>,
    local_ref_capture_index: Option<u32>,

struct LocalDef<'a> {
    name: &'a str,
    value_range: ops::Range<usize>,
    highlight: Option<Highlight>,

struct LocalScope<'a> {
    inherits: bool,
    range: ops::Range<usize>,
    local_defs: Vec<LocalDef<'a>>,

struct HighlightIter<'a, F>
    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
    source: &'a [u8],
    byte_offset: usize,
    highlighter: &'a mut Syntax,
    injection_callback: F,
    cancellation_flag: Option<&'a AtomicUsize>,
    layers: Vec<HighlightIterLayer<'a>>,
    iter_count: usize,
    next_event: Option<HighlightEvent>,
    last_highlight_range: Option<(usize, usize, usize)>,

struct HighlightIterLayer<'a> {
    _tree: Option<Tree>,
    cursor: QueryCursor,
    captures: iter::Peekable<QueryCaptures<'a, &'a [u8]>>,
    config: &'a HighlightConfiguration,
    highlight_end_stack: Vec<usize>,
    scope_stack: Vec<LocalScope<'a>>,
    ranges: Vec<Range>,
    depth: usize,

impl HighlightConfiguration {
    /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
    /// queries.
    /// # Parameters
    /// * `language`  - The Tree-sitter `Grammar` that should be used for parsing.
    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
    ///   should be non-empty, otherwise no syntax highlights will be added.
    /// * `injections_query` -  A string containing tree patterns for injecting other languages
    ///   into the document. This can be empty if no injections are desired.
    /// * `locals_query` - A string containing tree patterns for tracking local variable
    ///   definitions and references. This can be empty if local variable tracking is not needed.
    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
    pub fn new(
        language: Grammar,
        highlights_query: &str,
        injection_query: &str,
        locals_query: &str,
    ) -> Result<Self, QueryError> {
        // Concatenate the query strings, keeping track of the start offset of each section.
        let mut query_source = String::new();
        let locals_query_offset = query_source.len();
        let highlights_query_offset = query_source.len();

        // Construct a single query by concatenating the three query strings, but record the
        // range of pattern indices that belong to each individual string.
        let mut query = Query::new(language, &query_source)?;
        let mut locals_pattern_index = 0;
        let mut highlights_pattern_index = 0;
        for i in 0..(query.pattern_count()) {
            let pattern_offset = query.start_byte_for_pattern(i);
            if pattern_offset < highlights_query_offset {
                if pattern_offset < highlights_query_offset {
                    highlights_pattern_index += 1;
                if pattern_offset < locals_query_offset {
                    locals_pattern_index += 1;

        // Construct a separate query just for dealing with the 'combined injections'.
        // Disable the combined injection patterns in the main query.
        let mut combined_injections_query = Query::new(language, injection_query)?;
        let mut has_combined_queries = false;
        for pattern_index in 0..locals_pattern_index {
            let settings = query.property_settings(pattern_index);
            if settings.iter().any(|s| &*s.key == "injection.combined") {
                has_combined_queries = true;
            } else {
        let combined_injections_query = if has_combined_queries {
        } else {

        // Find all of the highlighting patterns that are disabled for nodes that
        // have been identified as local variables.
        let non_local_variable_patterns = (0..query.pattern_count())
            .map(|i| {
                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")

        // Store the numeric ids for all of the special captures.
        let mut injection_content_capture_index = None;
        let mut injection_language_capture_index = None;
        let mut local_def_capture_index = None;
        let mut local_def_value_capture_index = None;
        let mut local_ref_capture_index = None;
        let mut local_scope_capture_index = None;
        for (i, name) in query.capture_names().iter().enumerate() {
            let i = Some(i as u32);
            match name.as_str() {
                "injection.content" => injection_content_capture_index = i,
                "injection.language" => injection_language_capture_index = i,
                "local.definition" => local_def_capture_index = i,
                "local.definition-value" => local_def_value_capture_index = i,
                "local.reference" => local_ref_capture_index = i,
                "local.scope" => local_scope_capture_index = i,
                _ => {}

        let highlight_indices = vec![None; query.capture_names().len()];
        Ok(HighlightConfiguration {

    /// Get a slice containing all of the highlight names used in the configuration.
    pub fn names(&self) -> &[String] {

    /// Set the list of recognized highlight names.
    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
    /// these queries can choose to recognize highlights with different levels of specificity.
    /// For example, the string `function.builtin` will match against `function.method.builtin`
    /// and `function.builtin.constructor`, but will not match `function.method`.
    /// When highlighting, results are returned as `Highlight` values, which contain the index
    /// of the matched highlight this list of highlight names.
    pub fn configure(&mut self, recognized_names: &[String]) {
        let mut capture_parts = Vec::new();
            .extend(self.query.capture_names().iter().map(move |capture_name| {

                let mut best_index = None;
                let mut best_match_len = 0;
                for (i, recognized_name) in recognized_names.iter().enumerate() {
                    let mut len = 0;
                    let mut matches = true;
                    for part in recognized_name.split('.') {
                        len += 1;
                        if !capture_parts.contains(&part) {
                            matches = false;
                    if matches && len > best_match_len {
                        best_index = Some(i);
                        best_match_len = len;

impl<'a> HighlightIterLayer<'a> {
    /// Create a new 'layer' of highlighting for this document.
    /// In the even that the new layer contains "combined injections" (injections where multiple
    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
    /// added to the returned vector.
    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
        source: &'a [u8],
        highlighter: &mut Syntax,
        cancellation_flag: Option<&'a AtomicUsize>,
        injection_callback: &mut F,
        mut config: &'a HighlightConfiguration,
        mut depth: usize,
        mut ranges: Vec<Range>,
    ) -> Result<Vec<Self>, Error> {
        let mut result = Vec::with_capacity(1);
        let mut queue = Vec::new();
        loop {
            // --> Tree parsing part

            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
                    .map_err(|_| Error::InvalidLanguage)?;

                unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) };
                let tree = highlighter
                    .parse(source, None)
                unsafe { highlighter.parser.set_cancellation_flag(None) };
                let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new);

                // Process combined injections.
                if let Some(combined_injections_query) = &config.combined_injections_query {
                    let mut injections_by_pattern_index =
                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
                    let matches =
                        cursor.matches(combined_injections_query, tree.root_node(), |n: Node| {
                    for mat in matches {
                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
                        let (language_name, content_node, include_children) =
                            injection_for_match(config, combined_injections_query, &mat, source);
                        if language_name.is_some() {
                            entry.0 = language_name;
                        if let Some(content_node) = content_node {
                        entry.2 = include_children;
                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
                            if let Some(next_config) = (injection_callback)(lang_name) {
                                let ranges = Self::intersect_ranges(
                                if !ranges.is_empty() {
                                    queue.push((next_config, depth + 1, ranges));

                // --> Highlighting query part

                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
                // prevents them from being moved. But both of these values are really just
                // pointers, so it's actually ok to move them.
                let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) };
                let cursor_ref =
                    unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
                let captures = cursor_ref
                    .captures(&config.query, tree_ref.root_node(), move |n: Node| {

                result.push(HighlightIterLayer {
                    highlight_end_stack: Vec::new(),
                    scope_stack: vec![LocalScope {
                        inherits: false,
                        range: 0..usize::MAX,
                        local_defs: Vec::new(),
                    _tree: Some(tree),

            if queue.is_empty() {
            } else {
                let (next_config, next_depth, next_ranges) = queue.remove(0);
                config = next_config;
                depth = next_depth;
                ranges = next_ranges;


    // Compute the ranges that should be included when parsing an injection.
    // This takes into account three things:
    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges
    //   are the ranges of those nodes.
    // * `includes_children` - For some injections, the content nodes' children should be
    //   excluded from the nested document, so that only the content nodes' *own* content
    //   is reparsed. For other injections, the content nodes' entire ranges should be
    //   reparsed, including the ranges of their children.
    fn intersect_ranges(
        parent_ranges: &[Range],
        nodes: &[Node],
        includes_children: bool,
    ) -> Vec<Range> {
        let mut cursor = nodes[0].walk();
        let mut result = Vec::new();
        let mut parent_range_iter = parent_ranges.iter();
        let mut parent_range = parent_range_iter
            .expect("Layers should only be constructed with non-empty ranges vectors");
        for node in nodes.iter() {
            let mut preceding_range = Range {
                start_byte: 0,
                start_point: Point::new(0, 0),
                end_byte: node.start_byte(),
                end_point: node.start_position(),
            let following_range = Range {
                start_byte: node.end_byte(),
                start_point: node.end_position(),
                end_byte: usize::MAX,
                end_point: Point::new(usize::MAX, usize::MAX),

            for excluded_range in node
                .children(&mut cursor)
                .filter_map(|child| {
                    if includes_children {
                    } else {
                let mut range = Range {
                    start_byte: preceding_range.end_byte,
                    start_point: preceding_range.end_point,
                    end_byte: excluded_range.start_byte,
                    end_point: excluded_range.start_point,
                preceding_range = excluded_range;

                if range.end_byte < parent_range.start_byte {

                while parent_range.start_byte <= range.end_byte {
                    if parent_range.end_byte > range.start_byte {
                        if range.start_byte < parent_range.start_byte {
                            range.start_byte = parent_range.start_byte;
                            range.start_point = parent_range.start_point;

                        if parent_range.end_byte < range.end_byte {
                            if range.start_byte < parent_range.end_byte {
                                result.push(Range {
                                    start_byte: range.start_byte,
                                    start_point: range.start_point,
                                    end_byte: parent_range.end_byte,
                                    end_point: parent_range.end_point,
                            range.start_byte = parent_range.end_byte;
                            range.start_point = parent_range.end_point;
                        } else {
                            if range.start_byte < range.end_byte {

                    if let Some(next_range) = parent_range_iter.next() {
                        parent_range = next_range;
                    } else {
                        return result;

    // First, sort scope boundaries by their byte offset in the document. At a
    // given position, emit scope endings before scope beginnings. Finally, emit
    // scope boundaries from deeper layers first.
    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
        let depth = -(self.depth as isize);
        let next_start = self
            .map(|(m, i)| m.captures[*i].node.start_byte());
        let next_end = self.highlight_end_stack.last().cloned();
        match (next_start, next_end) {
            (Some(start), Some(end)) => {
                if start < end {
                    Some((start, true, depth))
                } else {
                    Some((end, false, depth))
            (Some(i), None) => Some((i, true, depth)),
            (None, Some(j)) => Some((j, false, depth)),
            _ => None,

impl<'a, F> HighlightIter<'a, F>
    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
    fn emit_event(
        &mut self,
        offset: usize,
        event: Option<HighlightEvent>,
    ) -> Option<Result<HighlightEvent, Error>> {
        let result;
        if self.byte_offset < offset {
            result = Some(Ok(HighlightEvent::Source {
                start: self.byte_offset,
                end: offset,
            self.byte_offset = offset;
            self.next_event = event;
        } else {
            result = event.map(Ok);

    fn sort_layers(&mut self) {
        while !self.layers.is_empty() {
            if let Some(sort_key) = self.layers[0].sort_key() {
                let mut i = 0;
                while i + 1 < self.layers.len() {
                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
                        if next_offset < sort_key {
                            i += 1;
                if i > 0 {
                    self.layers[0..(i + 1)].rotate_left(1);
            } else {
                let layer = self.layers.remove(0);

    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
        if let Some(sort_key) = layer.sort_key() {
            let mut i = 1;
            while i < self.layers.len() {
                if let Some(sort_key_i) = self.layers[i].sort_key() {
                    if sort_key_i > sort_key {
                        self.layers.insert(i, layer);
                    i += 1;
                } else {

impl<'a, F> Iterator for HighlightIter<'a, F>
    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
    type Item = Result<HighlightEvent, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        'main: loop {
            // If we've already determined the next highlight boundary, just return it.
            if let Some(e) = self.next_event.take() {
                return Some(Ok(e));

            // Periodically check for cancellation, returning `Cancelled` error if the
            // cancellation flag was flipped.
            if let Some(cancellation_flag) = self.cancellation_flag {
                self.iter_count += 1;
                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
                    self.iter_count = 0;
                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
                        return Some(Err(Error::Cancelled));

            // If none of the layers have any more highlight boundaries, terminate.
            if self.layers.is_empty() {
                return if self.byte_offset < self.source.len() {
                    let result = Some(Ok(HighlightEvent::Source {
                        start: self.byte_offset,
                        end: self.source.len(),
                    self.byte_offset = self.source.len();
                } else {

            // Get the next capture from whichever layer has the earliest highlight boundary.
            let range;
            let layer = &mut self.layers[0];
            if let Some((next_match, capture_index)) = layer.captures.peek() {
                let next_capture = next_match.captures[*capture_index];
                range = next_capture.node.byte_range();

                // If any previous highlight ends before this node starts, then before
                // processing this capture, emit the source code up until the end of the
                // previous highlight, and an end event for that highlight.
                if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
                    if end_byte <= range.start {
                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
            // If there are no more captures, then emit any remaining highlight end events.
            // And if there are none of those, then just advance to the end of the document.
            else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
                return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
            } else {
                // return self.emit_event(self.source.len(), None);
                return None;

            let (mut match_, capture_index) = layer.captures.next().unwrap();
            let mut capture = match_.captures[capture_index];

            // If this capture represents an injection, then process the injection.
            if match_.pattern_index < layer.config.locals_pattern_index {
                let (language_name, content_node, include_children) =
                    injection_for_match(&layer.config, &layer.config.query, &match_, &self.source);

                // Explicitly remove this match so that none of its other captures will remain
                // in the stream of captures.

                // If a language is found with the given name, then add a new language layer
                // to the highlighted document.
                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
                    if let Some(config) = (self.injection_callback)(language_name) {
                        let ranges = HighlightIterLayer::intersect_ranges(
                        if !ranges.is_empty() {
                            match HighlightIterLayer::new(
                                &mut self.injection_callback,
                                self.layers[0].depth + 1,
                            ) {
                                Ok(layers) => {
                                    for layer in layers {
                                Err(e) => return Some(Err(e)),

                continue 'main;

            // Remove from the local scope stack any local scopes that have already ended.
            while range.start > layer.scope_stack.last().unwrap().range.end {

            // If this capture is for tracking local variables, then process the
            // local variable info.
            let mut reference_highlight = None;
            let mut definition_highlight = None;
            while match_.pattern_index < layer.config.highlights_pattern_index {
                // If the node represents a local scope, push a new local scope onto
                // the scope stack.
                if Some(capture.index) == layer.config.local_scope_capture_index {
                    definition_highlight = None;
                    let mut scope = LocalScope {
                        inherits: true,
                        range: range.clone(),
                        local_defs: Vec::new(),
                    for prop in layer.config.query.property_settings(match_.pattern_index) {
                        if let "local.scope-inherits" = prop.key.as_ref() {
                            scope.inherits =
                                prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
                // If the node represents a definition, add a new definition to the
                // local scope at the top of the scope stack.
                else if Some(capture.index) == layer.config.local_def_capture_index {
                    reference_highlight = None;
                    definition_highlight = None;
                    let scope = layer.scope_stack.last_mut().unwrap();

                    let mut value_range = 0..0;
                    for capture in match_.captures {
                        if Some(capture.index) == layer.config.local_def_value_capture_index {
                            value_range = capture.node.byte_range();

                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
                        scope.local_defs.push(LocalDef {
                            highlight: None,
                        definition_highlight =
                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
                // If the node represents a reference, then try to find the corresponding
                // definition in the scope stack.
                else if Some(capture.index) == layer.config.local_ref_capture_index
                    && definition_highlight.is_none()
                    definition_highlight = None;
                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
                        for scope in layer.scope_stack.iter().rev() {
                            if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
                                if def.name == name && range.start >= def.value_range.end {
                                } else {
                            }) {
                                reference_highlight = highlight;
                            if !scope.inherits {

                // Continue processing any additional matches for the same node.
                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
                    let next_capture = next_match.captures[*next_capture_index];
                    if next_capture.node == capture.node {
                        capture = next_capture;
                        match_ = layer.captures.next().unwrap().0;

                continue 'main;

            // Otherwise, this capture must represent a highlight.
            // If this exact range has already been highlighted by an earlier pattern, or by
            // a different layer, then skip over this one.
            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
                    continue 'main;

            // If the current node was found to be a local variable, then skip over any
            // highlighting patterns that are disabled for local variables.
            if definition_highlight.is_some() || reference_highlight.is_some() {
                while layer.config.non_local_variable_patterns[match_.pattern_index] {
                    if let Some((next_match, next_capture_index)) = layer.captures.peek() {
                        let next_capture = next_match.captures[*next_capture_index];
                        if next_capture.node == capture.node {
                            capture = next_capture;
                            match_ = layer.captures.next().unwrap().0;

                    continue 'main;

            // Once a highlighting pattern is found for the current node, skip over
            // any later highlighting patterns that also match this node. Captures
            // for a given node are ordered by pattern index, so these subsequent
            // captures are guaranteed to be for highlighting, not injections or
            // local variables.
            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
                let next_capture = next_match.captures[*next_capture_index];
                if next_capture.node == capture.node {
                } else {

            let current_highlight = layer.config.highlight_indices[capture.index as usize];

            // If this node represents a local definition, then store the current
            // highlight value on the local scope entry representing this node.
            if let Some(definition_highlight) = definition_highlight {
                *definition_highlight = current_highlight;

            // Emit a scope start event and push the node's end position to the stack.
            if let Some(highlight) = reference_highlight.or(current_highlight) {
                self.last_highlight_range = Some((range.start, range.end, layer.depth));
                return self
                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));


fn injection_for_match<'a>(
    config: &HighlightConfiguration,
    query: &'a Query,
    query_match: &QueryMatch<'a>,
    source: &'a [u8],
) -> (Option<&'a str>, Option<Node<'a>>, bool) {
    let content_capture_index = config.injection_content_capture_index;
    let language_capture_index = config.injection_language_capture_index;

    let mut language_name = None;
    let mut content_node = None;
    for capture in query_match.captures {
        let index = Some(capture.index);
        if index == language_capture_index {
            language_name = capture.node.utf8_text(source).ok();
        } else if index == content_capture_index {
            content_node = Some(capture.node);

    let mut include_children = false;
    for prop in query.property_settings(query_match.pattern_index) {
        match prop.key.as_ref() {
            // In addition to specifying the language name via the text of a
            // captured node, it can also be hard-coded via a `#set!` predicate
            // that sets the injection.language key.
            "injection.language" => {
                if language_name.is_none() {
                    language_name = prop.value.as_ref().map(|s| s.as_ref())

            // By default, injections do not include the *children* of an
            // `injection.content` node - only the ranges that belong to the
            // node itself. This can be changed using a `#set!` predicate that
            // sets the `injection.include-children` key.
            "injection.include-children" => include_children = true,
            _ => {}

    (language_name, content_node, include_children)

fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
    if vec.len() > capacity {

fn test_parser() {
    let highlight_names: Vec<String> = [

    let language = get_language(LANG::Rust);
    let mut config = HighlightConfiguration::new(
        "", // locals.scm

    let source = Rope::from_str(
        struct Stuff {}
        fn main() {}
    let syntax = Syntax::new(&source, Arc::new(config));
    let tree = syntax.root_layer.tree.unwrap();
    let root = tree.root_node();
    assert_eq!(root.kind(), "source_file");

            "(source_file ",
            "(struct_item name: (type_identifier) body: (field_declaration_list)) ",
            "(function_item name: (identifier) parameters: (parameters) body: (block)))"

    let struct_node = root.child(0).unwrap();
    assert_eq!(struct_node.kind(), "struct_item");

fn test_input_edits() {
    use crate::State;
    use tree_sitter::InputEdit;

    let mut state = State::new("hello world!\ntest 123".into());
    let transaction = Transaction::change(
        vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(),
    let edits = LanguageLayer::generate_edits(state.doc.slice(..), &transaction.changes);
    // transaction.apply(&mut state);

            InputEdit {
                start_byte: 6,
                old_end_byte: 11,
                new_end_byte: 10,
                start_position: Point { row: 0, column: 6 },
                old_end_position: Point { row: 0, column: 11 },
                new_end_position: Point { row: 0, column: 10 }
            InputEdit {
                start_byte: 12,
                old_end_byte: 17,
                new_end_byte: 12,
                start_position: Point { row: 0, column: 12 },
                old_end_position: Point { row: 1, column: 4 },
                new_end_position: Point { row: 0, column: 12 }

    // Testing with the official example from tree-sitter
    let mut state = State::new("fn test() {}".into());
    let transaction = Transaction::change(&state, vec![(8, 8, Some("a: u32".into()))].into_iter());
    let edits = LanguageLayer::generate_edits(state.doc.slice(..), &transaction.changes);
    transaction.apply(&mut state);

    assert_eq!(state.doc(), "fn test(a: u32) {}");
        &[InputEdit {
            start_byte: 8,
            old_end_byte: 8,
            new_end_byte: 14,
            start_position: Point { row: 0, column: 8 },
            old_end_position: Point { row: 0, column: 8 },
            new_end_position: Point { row: 0, column: 14 }