From eb639eb2e4610ed2b440c8d95217f125005288fd Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Tue, 22 Sep 2020 18:23:48 +0900 Subject: More robust syntax detection/grammar loading. --- .gitmodules | 4 + Cargo.lock | 30 ++-- TODO.md | 5 +- helix-core/Cargo.toml | 1 + helix-core/src/lib.rs | 1 - helix-core/src/state.rs | 31 ++--- helix-core/src/syntax.rs | 237 +++++++++++++++++++++----------- helix-syntax/languages/tree-sitter-toml | 1 + helix-syntax/src/lib.rs | 1 + helix-term/src/editor.rs | 27 ++-- helix-view/src/view.rs | 3 +- 11 files changed, 203 insertions(+), 138 deletions(-) create mode 160000 helix-syntax/languages/tree-sitter-toml diff --git a/.gitmodules b/.gitmodules index 70fac3b6..f4d6456c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -78,3 +78,7 @@ path = helix-syntax/languages/tree-sitter-swift url = https://github.com/tree-sitter/tree-sitter-swift shallow = true +[submodule "helix-syntax/languages/tree-sitter-toml"] + path = helix-syntax/languages/tree-sitter-toml + url = https://github.com/ikatyang/tree-sitter-toml + shallow = true diff --git a/Cargo.lock b/Cargo.lock index f37c951c..def2673a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,15 +73,16 @@ dependencies = [ [[package]] name = "async-executor" -version = "1.1.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a831e74aa1937d3bbd3a356f34c23dbc6b6f0abc5160bd5484a9f75d5e76aea8" +checksum = "d373d78ded7d0b3fa8039375718cde0aace493f2e34fb60f51cbf567562ca801" dependencies = [ "async-task", "concurrent-queue", "fastrand", "futures-lite", "once_cell", + "vec-arena", ] [[package]] @@ -135,12 +136,13 @@ dependencies = [ [[package]] name = "async-net" -version = "1.3.0" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a48af5438be856056bdeb6c5d895148a715be5915fccee49d1e5b50851dc9b8b" +checksum = "fb04482b77baa38d59d56aee0a7b4266600ab28e2b8be7af03508f6a30ecbdcf" dependencies = [ "async-io", "blocking", + "fastrand", "futures-lite", ] @@ -162,9 +164,9 @@ dependencies = [ [[package]] name = "async-rwlock" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f8978b5ae008b5177da07a1bf1bfbe428f9bdb970c3fca0e92ed1c1930d7f34" +checksum = "806b1cc0828c2b1611ccbdd743fc0cc7af09009e62c95a0501c1e5da7b142a22" dependencies = [ "async-mutex", "event-listener", @@ -181,9 +183,9 @@ dependencies = [ [[package]] name = "async-task" -version = "3.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17772156ef2829aadc587461c7753af20b7e8db1529bc66855add962a3b35d3" +checksum = "6725e96011a83fae25074a8734932e8d67763522839be7473dcfe8a0d6a378b1" [[package]] name = "atomic-waker" @@ -428,9 +430,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9" +checksum = "4c30f6d0bc6b00693347368a67d41b58f2fb851215ff1da49e90fe2c5c667151" dependencies = [ "libc", ] @@ -615,9 +617,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "polling" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0307b8c7f438902536321f63c28cab0362f6ee89f1c7da47e3642ff956641c8b" +checksum = "e0720e0b9ea9d52451cf29d3413ba8a9303f8815d9d9653ef70e03ff73e65566" dependencies = [ "cfg-if", "libc", @@ -712,9 +714,9 @@ checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252" [[package]] name = "smol" -version = "1.0.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712d02afa6ac9e7b8c777fd181aff476d009280b54b8c28703d10fa5d7e80d83" +checksum = "d41237ba3e3ada55ff3515d37becc8fa90e5e4af2b13a011ec3f932d9f1b2405" dependencies = [ "async-channel", "async-executor", diff --git a/TODO.md b/TODO.md index ed1484c4..e6b5efd1 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,6 @@ -- Implement backspace/delete -- Implement marks +helper methods: iterate over selection spans in the document. + +- Implement marks (superset of Selection/Range) - Implement style configs, tab settings - Visual tab width - Refactor tree-sitter-highlight to work like the atom one, recomputing partial tree updates. diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 0d3bd4a4..6a4b09e5 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -17,3 +17,4 @@ unicode-segmentation = "1.6.0" unicode-width = "0.1.7" # slab = "0.4.2" tree-sitter = "0.16.1" +once_cell = "1.4.1" diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index e443168e..e97c16be 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -1,5 +1,4 @@ #![allow(unused)] -pub mod config; pub mod graphemes; pub mod macros; mod position; diff --git a/helix-core/src/state.rs b/helix-core/src/state.rs index 79e15eff..4b610207 100644 --- a/helix-core/src/state.rs +++ b/helix-core/src/state.rs @@ -1,4 +1,5 @@ use crate::graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary, RopeGraphemes}; +use crate::syntax::LOADER; use crate::{Position, Range, Rope, RopeSlice, Selection, Syntax}; use anyhow::Error; @@ -48,7 +49,8 @@ impl State { } } - pub fn load(path: PathBuf) -> Result { + // TODO: passing scopes here is awkward + pub fn load(path: PathBuf, scopes: &[String]) -> Result { use std::{env, fs::File, io::BufReader, path::PathBuf}; let _current_dir = env::current_dir()?; @@ -57,30 +59,17 @@ impl State { // TODO: create if not found let mut state = Self::new(doc); - state.path = Some(path); - - let language = helix_syntax::get_language(&helix_syntax::LANG::Rust); - let mut highlight_config = crate::syntax::HighlightConfiguration::new( - language, - &std::fs::read_to_string( - "../helix-syntax/languages/tree-sitter-rust/queries/highlights.scm", - ) - .unwrap(), - &std::fs::read_to_string( - "../helix-syntax/languages/tree-sitter-rust/queries/injections.scm", - ) - .unwrap(), - "", // locals.scm - ) - .unwrap(); + if let Some(language_config) = LOADER.language_config_for_file_name(path.as_path()) { + let highlight_config = language_config.highlight_config(scopes).unwrap().unwrap(); + // TODO: config.configure(scopes) is now delayed, is that ok? - // TODO: config.configure(scopes) is now delayed, is that ok? + let syntax = Syntax::new(&state.doc, highlight_config.clone()); - // TODO: get_language is called twice - let syntax = Syntax::new(helix_syntax::LANG::Rust, &state.doc, highlight_config); + state.syntax = Some(syntax); + }; - state.syntax = Some(syntax); + state.path = Some(path); Ok(state) } diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 8b55fc3e..26897ab3 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -2,21 +2,161 @@ use crate::{Change, Rope, RopeSlice, Transaction}; pub use helix_syntax::LANG; pub use helix_syntax::{get_language, get_language_name}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use once_cell::sync::OnceCell; + +// largely based on tree-sitter/cli/src/loader.rs +pub struct LanguageConfiguration { + pub(crate) scope: String, // source.rust + pub(crate) file_types: Vec, // filename ends_with? + + pub(crate) path: PathBuf, + + // content_regex + // injection_regex + // first_line_regex + // + // root_path + // + pub(crate) language_id: LANG, + pub(crate) highlight_config: OnceCell>>, + // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583 +} + +impl LanguageConfiguration { + pub fn highlight_config( + &self, + scopes: &[String], + ) -> Result>, anyhow::Error> { + self.highlight_config + .get_or_try_init(|| { + // let name = get_language_name(&self.language_id); + + let highlights_query = + std::fs::read_to_string(self.path.join("queries/highlights.scm")) + .unwrap_or(String::new()); + + let injections_query = + std::fs::read_to_string(self.path.join("queries/injections.scm")) + .unwrap_or(String::new()); + + let locals_query = ""; + + if highlights_query.is_empty() { + Ok(None) + } else { + let language = get_language(&self.language_id); + let mut config = HighlightConfiguration::new( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .unwrap(); // TODO: no unwrap + config.configure(&scopes); + Ok(Some(Arc::new(config))) + } + }) + .map(Option::as_ref) + } +} + +use once_cell::sync::Lazy; + +pub(crate) static LOADER: Lazy = Lazy::new(|| Loader::init()); + +pub struct Loader { + // highlight_names ? + language_configs: Vec>, + language_config_ids_by_file_type: HashMap, // Vec +} + +impl Loader { + fn init() -> Loader { + let mut loader = Loader { + language_configs: Vec::new(), + language_config_ids_by_file_type: HashMap::new(), + }; + + // hardcoded from now, might load from toml + let configs = vec![ + LanguageConfiguration { + scope: "source.rust".to_string(), + file_types: vec!["rs".to_string()], + language_id: LANG::Rust, + highlight_config: OnceCell::new(), + // + path: "../helix-syntax/languages/tree-sitter-rust".into(), + }, + LanguageConfiguration { + scope: "source.toml".to_string(), + file_types: vec!["toml".to_string()], + language_id: LANG::Toml, + highlight_config: OnceCell::new(), + // + path: "../helix-syntax/languages/tree-sitter-toml".into(), + }, + ]; + + for config in configs { + // get the next id + let language_id = loader.language_configs.len(); + + for file_type in &config.file_types { + // entry().or_insert(Vec::new).push(language_id); + loader + .language_config_ids_by_file_type + .insert(file_type.clone(), language_id); + } + + loader.language_configs.push(Arc::new(config)); + } + + loader + } + + pub fn language_config_for_file_name(&self, path: &Path) -> Option> { + // Find all the language configurations that match this file name + // or a suffix of the file name. + let configuration_id = path + .file_name() + .and_then(|n| n.to_str()) + .and_then(|file_name| self.language_config_ids_by_file_type.get(file_name)) + .or_else(|| { + path.extension() + .and_then(|extension| extension.to_str()) + .and_then(|extension| self.language_config_ids_by_file_type.get(extension)) + }); + + configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) + + // TODO: content_regex handling conflict resolution + } +} + +// + pub struct Syntax { - grammar: Language, + // grammar: Grammar, parser: Parser, cursors: Vec, - config: HighlightConfiguration, + config: Arc, root_layer: LanguageLayer, } impl Syntax { // buffer, grammar, config, grammars, sync_timeout? - pub fn new(language: LANG, source: &Rope, config: HighlightConfiguration) -> Self { + pub fn new( + /*language: LANG,*/ source: &Rope, + config: Arc, + ) -> Self { // fetch grammar for parser based on language string - let grammar = get_language(&language); + // let grammar = get_language(&language); let parser = Parser::new(); let root_layer = LanguageLayer::new(); @@ -25,7 +165,7 @@ impl Syntax { // track scope_descriptor: a Vec of scopes for item in tree let mut syntax = Self { - grammar, + // grammar, parser, cursors: Vec::new(), config, @@ -48,10 +188,6 @@ impl Syntax { syntax } - pub fn configure(&mut self, scopes: &[String]) { - self.config.configure(scopes) - } - pub fn update(&mut self, source: &Rope, changeset: &ChangeSet) -> Result<(), Error> { self.root_layer .update(&mut self.parser, &self.config, source, changeset) @@ -88,9 +224,9 @@ impl Syntax { let mut cursor = QueryCursor::new(); // reuse a pool let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(self.tree()) }; let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let query_ref = unsafe { mem::transmute::<_, &'static mut Query>(&mut self.config.query) }; + let query_ref = unsafe { mem::transmute::<_, &'static Query>(&self.config.query) }; let config_ref = - unsafe { mem::transmute::<_, &'static HighlightConfiguration>(&self.config) }; + unsafe { mem::transmute::<_, &'static HighlightConfiguration>(self.config.as_ref()) }; // TODO: if reusing cursors this might need resetting if let Some(range) = &range { @@ -432,8 +568,8 @@ impl LanguageLayer { use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ - Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch, - Range, Tree, + Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, + QueryMatch, Range, Tree, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -462,7 +598,7 @@ pub enum HighlightEvent { /// /// This struct is immutable and can be shared between threads. pub struct HighlightConfiguration { - pub language: Language, + pub language: Grammar, pub query: Query, combined_injections_query: Option, locals_pattern_index: usize, @@ -477,16 +613,6 @@ pub struct HighlightConfiguration { local_ref_capture_index: Option, } -/// Performs syntax highlighting, recognizing a given list of highlight names. -/// -/// For the best performance `Highlighter` values should be reused between -/// syntax highlighting calls. A separate highlighter is needed for each thread that -/// is performing highlighting. -pub struct Highlighter { - parser: Parser, - cursors: Vec, -} - #[derive(Debug)] struct LocalDef<'a> { name: &'a str, @@ -527,70 +653,13 @@ struct HighlightIterLayer<'a> { depth: usize, } -impl Default for Highlighter { - fn default() -> Self { - Highlighter { - parser: Parser::new(), - cursors: Vec::new(), - } - } -} - -impl Highlighter { - pub fn new() -> Self { - Self::default() - } - - pub fn parser(&mut self) -> &mut Parser { - &mut self.parser - } - - // /// Iterate over the highlighted regions for a given slice of source code. - // pub fn highlight<'a>( - // &'a mut self, - // config: &'a HighlightConfiguration, - // source: &'a [u8], - // cancellation_flag: Option<&'a AtomicUsize>, - // mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, - // ) -> Result> + 'a, Error> { - // let layers = HighlightIterLayer::new( - // source, - // self, - // cancellation_flag, - // &mut injection_callback, - // config, - // 0, - // vec![Range { - // start_byte: 0, - // end_byte: usize::MAX, - // start_point: Point::new(0, 0), - // end_point: Point::new(usize::MAX, usize::MAX), - // }], - // )?; - // assert_ne!(layers.len(), 0); - // let mut result = HighlightIter { - // source, - // byte_offset: 0, - // injection_callback, - // cancellation_flag, - // highlighter: self, - // iter_count: 0, - // layers, - // next_event: None, - // last_highlight_range: None, - // }; - // result.sort_layers(); - // Ok(result) - // } -} - impl HighlightConfiguration { - /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting + /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting /// queries. /// /// # Parameters /// - /// * `language` - The Tree-sitter `Language` that should be used for parsing. + /// * `language` - The Tree-sitter `Grammar` that should be used for parsing. /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This /// should be non-empty, otherwise no syntax highlights will be added. /// * `injections_query` - A string containing tree patterns for injecting other languages @@ -600,7 +669,7 @@ impl HighlightConfiguration { /// /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. pub fn new( - language: Language, + language: Grammar, highlights_query: &str, injection_query: &str, locals_query: &str, diff --git a/helix-syntax/languages/tree-sitter-toml b/helix-syntax/languages/tree-sitter-toml new file mode 160000 index 00000000..42c9ff20 --- /dev/null +++ b/helix-syntax/languages/tree-sitter-toml @@ -0,0 +1 @@ +Subproject commit 42c9ff20c0371bed7f514036e823f10793caacec diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs index 1ca36ca6..60472fdd 100644 --- a/helix-syntax/src/lib.rs +++ b/helix-syntax/src/lib.rs @@ -82,6 +82,7 @@ mk_langs!( (Rust, tree_sitter_rust), (Scala, tree_sitter_scala), (Swift, tree_sitter_swift), + (Toml, tree_sitter_toml), (Tsx, tree_sitter_tsx), (Typescript, tree_sitter_typescript) ); diff --git a/helix-term/src/editor.rs b/helix-term/src/editor.rs index 790c3f16..24e62306 100644 --- a/helix-term/src/editor.rs +++ b/helix-term/src/editor.rs @@ -1,10 +1,5 @@ use crate::Args; -use helix_core::{ - state::coords_at_pos, - state::Mode, - syntax::{HighlightConfiguration, HighlightEvent, Highlighter}, - State, -}; +use helix_core::{state::coords_at_pos, state::Mode, syntax::HighlightEvent, State}; use helix_view::{commands, keymap, View}; use std::{ @@ -107,14 +102,18 @@ impl Editor { // TODO: cache highlight results // TODO: only recalculate when state.doc is actually modified - let highlights: Vec<_> = view - .state - .syntax - .as_mut() - .unwrap() - .highlight_iter(source_code.as_bytes(), Some(range), None, |_| None) - .unwrap() - .collect(); // TODO: we collect here to avoid double borrow, fix later + let highlights: Vec<_> = match view.state.syntax.as_mut() { + Some(syntax) => { + syntax + .highlight_iter(source_code.as_bytes(), Some(range), None, |_| None) + .unwrap() + .collect() // TODO: we collect here to avoid double borrow, fix later + } + None => vec![Ok(HighlightEvent::Source { + start: range.start, + end: range.end, + })], + }; let mut spans = Vec::new(); diff --git a/helix-view/src/view.rs b/helix-view/src/view.rs index 3f7a9974..0900b0ca 100644 --- a/helix-view/src/view.rs +++ b/helix-view/src/view.rs @@ -14,9 +14,8 @@ pub struct View { impl View { pub fn open(path: PathBuf, size: (u16, u16)) -> Result { - let mut state = State::load(path)?; let theme = Theme::default(); - state.syntax.as_mut().unwrap().configure(theme.scopes()); + let state = State::load(path, theme.scopes())?; let view = View { state, -- cgit v1.2.3-70-g09d2