aboutsummaryrefslogtreecommitdiff
path: root/helix-core/src/syntax.rs
diff options
context:
space:
mode:
authorBlaž Hrastnik2020-09-17 05:57:49 +0000
committerBlaž Hrastnik2020-09-17 07:32:40 +0000
commit088f8a82af1b90e422c495cde92b537dedb1e419 (patch)
treec0607eb3f08e4cc782aa4f627046f439820425b5 /helix-core/src/syntax.rs
parent31999d6528b247e223774d1a4edec7966dda3d9e (diff)
Incremental parsing: rough draft.
Diffstat (limited to 'helix-core/src/syntax.rs')
-rw-r--r--helix-core/src/syntax.rs634
1 files changed, 536 insertions, 98 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index b661efad..c0b67f5d 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -1,66 +1,397 @@
-// pub struct Syntax {
-// parser: Parser,
-// }
+use crate::{Change, Rope, RopeSlice, Transaction};
+pub use helix_syntax::LANG;
+pub use helix_syntax::{get_language, get_language_name};
+
+pub struct Syntax {
+ grammar: Language,
+ parser: Parser,
+ cursors: Vec<QueryCursor>,
+
+ config: HighlightConfiguration,
-//impl Syntax {
-// // buffer, grammar, config, grammars, sync_timeout?
-// pub fn new() -> Self {
-// unimplemented!()
-// // make a new root layer
-// // track markers of injections
-// //
-// // track scope_descriptor: a Vec of scopes for item in tree
-// //
-// // fetch grammar for parser based on language string
-// // update root layer
-// }
-
-// // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers
-
-// // call this on transaction.apply() -> buffer_changed(changes)
-// //
-// // fn parse(language, old_tree, ranges)
-// //
-// // fn tree() -> Tree
-// //
-// // <!--update_for_injection(grammar)-->
-
-// // Highlighting
-// // fn highlight_iter() -> iterates over all the scopes
-// // on_tokenize
-// // on_change_highlighting
-
-// // Commenting
-// // comment_strings_for_pos
-// // is_commented
-
-// // Indentation
-// // suggested_indent_for_line_at_buffer_row
-// // suggested_indent_for_buffer_row
-// // indent_level_for_line
-
-// // TODO: Folding
-
-// // Syntax APIs
-// // get_syntax_node_containing_range ->
-// // ...
-// // get_syntax_node_at_pos
-// // buffer_range_for_scope_at_pos
-//}
+ root_layer: LanguageLayer,
+}
+
+impl Syntax {
+ // buffer, grammar, config, grammars, sync_timeout?
+ pub fn new(language: LANG, source: &Rope, config: HighlightConfiguration) -> Self {
+ // fetch grammar for parser based on language string
+ let grammar = get_language(&language);
+ let parser = Parser::new();
+
+ let root_layer = LanguageLayer::new();
+
+ // track markers of injections
+ // track scope_descriptor: a Vec of scopes for item in tree
+
+ let mut syntax = Self {
+ grammar,
+ parser,
+ cursors: Vec::new(),
+ config,
+ root_layer,
+ };
+
+ // update root layer
+ syntax.root_layer.parse(
+ &mut syntax.parser,
+ &syntax.config,
+ source,
+ 0,
+ vec![Range {
+ start_byte: 0,
+ end_byte: usize::MAX,
+ start_point: Point::new(0, 0),
+ end_point: Point::new(usize::MAX, usize::MAX),
+ }],
+ );
+ syntax
+ }
+
+ pub fn configure(&mut self, scopes: &[String]) {
+ self.config.configure(scopes)
+ }
+
+ pub fn update(&mut self, source: &Rope, changeset: &ChangeSet) -> Result<(), Error> {
+ self.root_layer
+ .update(&mut self.parser, &self.config, source, changeset)
+
+ // TODO: deal with injections and update them too
+ }
+
+ // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers
+
+ // call this on transaction.apply() -> buffer_changed(changes)
+ //
+ // fn parse(language, old_tree, ranges)
+ //
+ fn tree(&self) -> &Tree {
+ self.root_layer.tree()
+ }
+ //
+ // <!--update_for_injection(grammar)-->
+
+ // Highlighting
+
+ /// Iterate over the highlighted regions for a given slice of source code.
+ pub fn highlight_iter<'a>(
+ &'a mut self,
+ source: &'a [u8],
+ cancellation_flag: Option<&'a AtomicUsize>,
+ mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
+ ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
+ // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
+ // prevents them from being moved. But both of these values are really just
+ // pointers, so it's actually ok to move them.
+
+ let mut cursor = QueryCursor::new(); // reuse a pool
+ let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(self.tree()) };
+ let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
+ let query_ref = unsafe { mem::transmute::<_, &'static mut Query>(&mut self.config.query) };
+ let config_ref =
+ unsafe { mem::transmute::<_, &'static HighlightConfiguration>(&self.config) };
+ let captures = cursor_ref
+ .captures(query_ref, tree_ref.root_node(), move |n: Node| {
+ &source[n.byte_range()]
+ })
+ .peekable();
+
+ // manually craft the root layer based on the existing tree
+ let layer = HighlightIterLayer {
+ highlight_end_stack: Vec::new(),
+ scope_stack: vec![LocalScope {
+ inherits: false,
+ range: 0..usize::MAX,
+ local_defs: Vec::new(),
+ }],
+ cursor,
+ depth: 0,
+ _tree: None,
+ captures,
+ config: config_ref,
+ ranges: vec![Range {
+ start_byte: 0,
+ end_byte: usize::MAX,
+ start_point: Point::new(0, 0),
+ end_point: Point::new(usize::MAX, usize::MAX),
+ }],
+ };
+
+ let mut result = HighlightIter {
+ source,
+ byte_offset: 0,
+ injection_callback,
+ cancellation_flag,
+ highlighter: self,
+ iter_count: 0,
+ layers: vec![layer],
+ next_event: None,
+ last_highlight_range: None,
+ };
+ result.sort_layers();
+ Ok(result)
+ }
+ // on_tokenize
+ // on_change_highlighting
+
+ // Commenting
+ // comment_strings_for_pos
+ // is_commented
+
+ // Indentation
+ // suggested_indent_for_line_at_buffer_row
+ // suggested_indent_for_buffer_row
+ // indent_level_for_line
+
+ // TODO: Folding
+
+ // Syntax APIs
+ // get_syntax_node_containing_range ->
+ // ...
+ // get_syntax_node_at_pos
+ // buffer_range_for_scope_at_pos
+}
pub struct LanguageLayer {
// mode
-// grammar
-// depth
-// tree: Tree,
+ // grammar
+ // depth
+ tree: Option<Tree>,
}
-// impl LanguageLayer {
-// // fn highlight_iter() -> same as Mode but for this layer. Mode composits these
-// // fn buffer_changed
-// // fn update(range)
-// // fn update_injections()
-// }
+use crate::state::{coords_at_pos, Coords};
+use crate::transaction::{ChangeSet, Operation};
+use crate::Tendril;
+
+impl LanguageLayer {
+ pub fn new() -> Self {
+ Self { tree: None }
+ }
+
+ fn tree(&self) -> &Tree {
+ // TODO: no unwrap
+ self.tree.as_ref().unwrap()
+ }
+
+ fn parse<'a>(
+ &mut self,
+ parser: &mut Parser,
+ config: &HighlightConfiguration,
+ source: &Rope,
+ mut depth: usize,
+ mut ranges: Vec<Range>,
+ ) -> Result<(), Error> {
+ if parser.set_included_ranges(&ranges).is_ok() {
+ parser
+ .set_language(config.language)
+ .map_err(|_| Error::InvalidLanguage)?;
+
+ // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
+ let tree = parser
+ .parse_with(
+ &mut |byte, _| {
+ if byte <= source.len_bytes() {
+ let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
+ chunk[byte - start_byte..].as_bytes()
+ } else {
+ // out of range
+ &[]
+ }
+ },
+ self.tree.as_ref(),
+ )
+ .ok_or(Error::Cancelled)?;
+ // unsafe { syntax.parser.set_cancellation_flag(None) };
+ // let mut cursor = syntax.cursors.pop().unwrap_or_else(QueryCursor::new);
+
+ // Process combined injections. (ERB, EJS, etc https://github.com/tree-sitter/tree-sitter/pull/526)
+ // if let Some(combined_injections_query) = &config.combined_injections_query {
+ // let mut injections_by_pattern_index =
+ // vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
+ // let matches =
+ // cursor.matches(combined_injections_query, tree.root_node(), |n: Node| {
+ // &source[n.byte_range()]
+ // });
+ // for mat in matches {
+ // let entry = &mut injections_by_pattern_index[mat.pattern_index];
+ // let (language_name, content_node, include_children) =
+ // injection_for_match(config, combined_injections_query, &mat, source);
+ // if language_name.is_some() {
+ // entry.0 = language_name;
+ // }
+ // if let Some(content_node) = content_node {
+ // entry.1.push(content_node);
+ // }
+ // entry.2 = include_children;
+ // }
+ // for (lang_name, content_nodes, includes_children) in injections_by_pattern_index {
+ // if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
+ // if let Some(next_config) = (injection_callback)(lang_name) {
+ // let ranges =
+ // Self::intersect_ranges(&ranges, &content_nodes, includes_children);
+ // if !ranges.is_empty() {
+ // queue.push((next_config, depth + 1, ranges));
+ // }
+ // }
+ // }
+ // }
+ // }
+ self.tree = Some(tree)
+ }
+ Ok(())
+ }
+
+ pub(crate) fn generate_edits(
+ text: &RopeSlice,
+ changeset: &ChangeSet,
+ ) -> Vec<tree_sitter::InputEdit> {
+ use Operation::*;
+ let mut old_pos = 0;
+ let mut new_pos = 0;
+
+ let mut edits = Vec::new();
+
+ let mut iter = changeset.changes.iter().peekable();
+
+ // TODO; this is a lot easier with Change instead of Operation.
+
+ fn point_at_pos(text: &RopeSlice, pos: usize) -> (usize, Point) {
+ let byte = text.char_to_byte(pos);
+ let line = text.char_to_line(pos);
+ let line_start_byte = text.line_to_byte(line);
+ let col = byte - line_start_byte;
+
+ (byte, Point::new(line, col))
+ }
+
+ fn traverse(point: Point, text: &Tendril) -> Point {
+ let Point {
+ mut row,
+ mut column,
+ } = point;
+
+ // TODO: there should be a better way here
+ for ch in text.bytes() {
+ if ch == b'\n' {
+ row += 1;
+ column = 0;
+ } else {
+ column += 1;
+ }
+ }
+ Point { row, column }
+ }
+
+ while let Some(change) = iter.next() {
+ let len = match change {
+ Delete(i) | Retain(i) => *i,
+ Insert(_) => 0,
+ };
+ let old_end = old_pos + len;
+
+ match change {
+ Retain(_) => {
+ new_pos += len;
+ }
+ Delete(_) => {
+ let (start_byte, start_position) = point_at_pos(&text, old_pos);
+ let (old_end_byte, old_end_position) = point_at_pos(&text, old_end);
+
+ // TODO: Position also needs to be byte based...
+ // let byte = char_to_byte(old_pos)
+ // let line = char_to_line(old_pos)
+ // let line_start_byte = line_to_byte()
+ // Position::new(line, line_start_byte - byte)
+
+ // a subsequent ins means a replace, consume it
+ if let Some(Insert(s)) = iter.peek() {
+ iter.next();
+ let ins = s.chars().count();
+
+ // replacement
+ edits.push(tree_sitter::InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte, // old_end to byte
+ new_end_byte: start_byte + s.len(), // old_pos to byte + s.len()
+ start_position, // old pos to coords
+ old_end_position, // old_end to coords
+ new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
+ });
+
+ new_pos += ins;
+ } else {
+ // deletion
+ edits.push(tree_sitter::InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte, // old_end to byte
+ new_end_byte: start_byte, // old_pos to byte
+ start_position, // old pos to coords
+ old_end_position, // old_end to coords
+ new_end_position: start_position, // old pos to coords
+ });
+ };
+ }
+ Insert(s) => {
+ let (start_byte, start_position) = point_at_pos(&text, old_pos);
+
+ let ins = s.chars().count();
+
+ // insert
+ edits.push(tree_sitter::InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte: start_byte, // same
+ new_end_byte: start_byte + s.len(), // old_pos + s.len()
+ start_position, // old pos to coords
+ old_end_position: start_position, // same
+ new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
+ });
+
+ new_pos += ins;
+ }
+ }
+ old_pos = old_end;
+ }
+ edits
+ }
+
+ fn update(
+ &mut self,
+ parser: &mut Parser,
+ config: &HighlightConfiguration,
+ source: &Rope,
+ changeset: &ChangeSet,
+ ) -> Result<(), Error> {
+ if changeset.is_empty() {
+ return Ok(());
+ }
+
+ let edits = Self::generate_edits(&source.slice(..), changeset);
+
+ // Notify the tree about all the changes
+ for edit in edits {
+ self.tree.as_mut().unwrap().edit(&edit);
+ }
+
+ self.parse(
+ parser,
+ config,
+ source,
+ 0,
+ // TODO: what to do about this range on update
+ vec![Range {
+ start_byte: 0,
+ end_byte: usize::MAX,
+ start_point: Point::new(0, 0),
+ end_point: Point::new(usize::MAX, usize::MAX),
+ }],
+ )
+ }
+
+ // fn highlight_iter() -> same as Mode but for this layer. Mode composits these
+ // fn buffer_changed
+ // fn update(range)
+ // fn update_injections()
+}
// -- refactored from tree-sitter-highlight to be able to retain state
// TODO: add seek() to iter
@@ -169,7 +500,7 @@ where
{
source: &'a [u8],
byte_offset: usize,
- highlighter: &'a mut Highlighter,
+ highlighter: &'a mut Syntax,
injection_callback: F,
cancellation_flag: Option<&'a AtomicUsize>,
layers: Vec<HighlightIterLayer<'a>>,
@@ -179,7 +510,7 @@ where
}
struct HighlightIterLayer<'a> {
- _tree: Tree,
+ _tree: Option<Tree>,
cursor: QueryCursor,
captures: iter::Peekable<QueryCaptures<'a, &'a [u8]>>,
config: &'a HighlightConfiguration,
@@ -207,43 +538,43 @@ impl Highlighter {
&mut self.parser
}
- /// Iterate over the highlighted regions for a given slice of source code.
- pub fn highlight<'a>(
- &'a mut self,
- config: &'a HighlightConfiguration,
- source: &'a [u8],
- cancellation_flag: Option<&'a AtomicUsize>,
- mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
- ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
- let layers = HighlightIterLayer::new(
- source,
- self,
- cancellation_flag,
- &mut injection_callback,
- config,
- 0,
- vec![Range {
- start_byte: 0,
- end_byte: usize::MAX,
- start_point: Point::new(0, 0),
- end_point: Point::new(usize::MAX, usize::MAX),
- }],
- )?;
- assert_ne!(layers.len(), 0);
- let mut result = HighlightIter {
- source,
- byte_offset: 0,
- injection_callback,
- cancellation_flag,
- highlighter: self,
- iter_count: 0,
- layers,
- next_event: None,
- last_highlight_range: None,
- };
- result.sort_layers();
- Ok(result)
- }
+ // /// Iterate over the highlighted regions for a given slice of source code.
+ // pub fn highlight<'a>(
+ // &'a mut self,
+ // config: &'a HighlightConfiguration,
+ // source: &'a [u8],
+ // cancellation_flag: Option<&'a AtomicUsize>,
+ // mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
+ // ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
+ // let layers = HighlightIterLayer::new(
+ // source,
+ // self,
+ // cancellation_flag,
+ // &mut injection_callback,
+ // config,
+ // 0,
+ // vec![Range {
+ // start_byte: 0,
+ // end_byte: usize::MAX,
+ // start_point: Point::new(0, 0),
+ // end_point: Point::new(usize::MAX, usize::MAX),
+ // }],
+ // )?;
+ // assert_ne!(layers.len(), 0);
+ // let mut result = HighlightIter {
+ // source,
+ // byte_offset: 0,
+ // injection_callback,
+ // cancellation_flag,
+ // highlighter: self,
+ // iter_count: 0,
+ // layers,
+ // next_event: None,
+ // last_highlight_range: None,
+ // };
+ // result.sort_layers();
+ // Ok(result)
+ // }
}
impl HighlightConfiguration {
@@ -413,7 +744,7 @@ impl<'a> HighlightIterLayer<'a> {
/// added to the returned vector.
fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
source: &'a [u8],
- highlighter: &mut Highlighter,
+ highlighter: &mut Syntax,
cancellation_flag: Option<&'a AtomicUsize>,
injection_callback: &mut F,
mut config: &'a HighlightConfiguration,
@@ -423,6 +754,8 @@ impl<'a> HighlightIterLayer<'a> {
let mut result = Vec::with_capacity(1);
let mut queue = Vec::new();
loop {
+ // --> Tree parsing part
+
if highlighter.parser.set_included_ranges(&ranges).is_ok() {
highlighter
.parser
@@ -474,6 +807,8 @@ impl<'a> HighlightIterLayer<'a> {
}
}
+ // --> Highlighting query part
+
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
// prevents them from being moved. But both of these values are really just
// pointers, so it's actually ok to move them.
@@ -495,7 +830,7 @@ impl<'a> HighlightIterLayer<'a> {
}],
cursor,
depth,
- _tree: tree,
+ _tree: Some(tree),
captures,
config,
ranges,
@@ -1016,3 +1351,106 @@ fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
}
vec.clear();
}
+
+#[test]
+fn test_parser() {
+ let highlight_names: Vec<String> = [
+ "attribute",
+ "constant",
+ "function.builtin",
+ "function",
+ "keyword",
+ "operator",
+ "property",
+ "punctuation",
+ "punctuation.bracket",
+ "punctuation.delimiter",
+ "string",
+ "string.special",
+ "tag",
+ "type",
+ "type.builtin",
+ "variable",
+ "variable.builtin",
+ "variable.parameter",
+ ]
+ .iter()
+ .cloned()
+ .map(String::from)
+ .collect();
+
+ let language = get_language(&LANG::Rust);
+ let mut config = HighlightConfiguration::new(
+ language,
+ &std::fs::read_to_string(
+ "../helix-syntax/languages/tree-sitter-rust/queries/highlights.scm",
+ )
+ .unwrap(),
+ &std::fs::read_to_string(
+ "../helix-syntax/languages/tree-sitter-rust/queries/injections.scm",
+ )
+ .unwrap(),
+ "", // locals.scm
+ )
+ .unwrap();
+ config.configure(&highlight_names);
+
+ let source = Rope::from_str(
+ "
+ struct Stuff {}
+ fn main() {}
+ ",
+ );
+ let syntax = Syntax::new(LANG::Rust, &source, config);
+ let tree = syntax.root_layer.tree.unwrap();
+ let root = tree.root_node();
+ assert_eq!(root.kind(), "source_file");
+
+ assert_eq!(
+ root.to_sexp(),
+ concat!(
+ "(source_file ",
+ "(struct_item name: (type_identifier) body: (field_declaration_list)) ",
+ "(function_item name: (identifier) parameters: (parameters) body: (block)))"
+ )
+ );
+
+ let struct_node = root.child(0).unwrap();
+ assert_eq!(struct_node.kind(), "struct_item");
+}
+
+#[test]
+fn test_input_edits() {
+ use crate::State;
+ use tree_sitter::InputEdit;
+
+ let mut state = State::new("hello world!\ntest 123".into());
+ let transaction = Transaction::change(
+ &state,
+ vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(),
+ );
+ let edits = LanguageLayer::generate_edits(&state.doc.slice(..), &transaction.changes);
+ // transaction.apply(&mut state);
+
+ assert_eq!(
+ edits,
+ &[
+ InputEdit {
+ start_byte: 6,
+ old_end_byte: 11,
+ new_end_byte: 10,
+ start_position: Point { row: 0, column: 6 },
+ old_end_position: Point { row: 0, column: 11 },
+ new_end_position: Point { row: 0, column: 10 }
+ },
+ InputEdit {
+ start_byte: 12,
+ old_end_byte: 17,
+ new_end_byte: 12,
+ start_position: Point { row: 0, column: 12 },
+ old_end_position: Point { row: 1, column: 4 },
+ new_end_position: Point { row: 0, column: 12 }
+ }
+ ]
+ );
+}