aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPascal Kuthe2022-11-22 02:54:22 +0000
committerGitHub2022-11-22 02:54:22 +0000
commitf538b697597dccdd2492606858f0976c7cf7e4d2 (patch)
tree9e5e0709fc386462629c8ac5d832c66e8eb372ae
parent9059c65a5385f6d3cc0bc7f6e3f835ae542635a5 (diff)
significantly improve treesitter performance while editing large files (#4716)
* significantly improve treesitter performance while editing large files * Apply stylistic suggestions from code review Co-authored-by: Michael Davis <mcarsondavis@gmail.com> * use PartialEq and Hash instead of a freestanding function Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
-rw-r--r--Cargo.lock27
-rw-r--r--helix-core/Cargo.toml2
-rw-r--r--helix-core/src/syntax.rs117
3 files changed, 106 insertions, 40 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 5833714d..a2b2ffd8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -14,6 +14,18 @@ dependencies = [
]
[[package]]
+name = "ahash"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
+dependencies = [
+ "cfg-if",
+ "getrandom",
+ "once_cell",
+ "version_check",
+]
+
+[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -400,18 +412,29 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
- "ahash",
+ "ahash 0.7.6",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
+dependencies = [
+ "ahash 0.8.2",
]
[[package]]
name = "helix-core"
version = "0.6.0"
dependencies = [
+ "ahash 0.8.2",
"arc-swap",
"bitflags",
"chrono",
"encoding_rs",
"etcetera",
+ "hashbrown 0.13.1",
"helix-loader",
"log",
"once_cell",
@@ -1288,7 +1311,7 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137"
dependencies = [
- "hashbrown",
+ "hashbrown 0.12.3",
"regex",
]
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index 45272f98..eb886c90 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -30,6 +30,8 @@ once_cell = "1.16"
arc-swap = "1"
regex = "1"
bitflags = "1.3"
+ahash = "0.8.2"
+hashbrown = { version = "0.13.1", features = ["raw"] }
log = "0.4"
serde = { version = "1.0", features = ["derive"] }
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index b567a8ab..8dc34a3e 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -7,8 +7,10 @@ use crate::{
Rope, RopeSlice, Tendril,
};
+use ahash::RandomState;
use arc_swap::{ArcSwap, Guard};
use bitflags::bitflags;
+use hashbrown::raw::RawTable;
use slotmap::{DefaultKey as LayerId, HopSlotMap};
use std::{
@@ -16,7 +18,8 @@ use std::{
cell::RefCell,
collections::{HashMap, VecDeque},
fmt,
- mem::replace,
+ hash::{Hash, Hasher},
+ mem::{replace, transmute},
path::Path,
str::FromStr,
sync::Arc,
@@ -770,30 +773,38 @@ impl Syntax {
// Convert the changeset into tree sitter edits.
let edits = generate_edits(old_source, changeset);
+ // This table allows inverse indexing of `layers`.
+ // That is by hashing a `Layer` you can find
+ // the `LayerId` of an existing equivalent `Layer` in `layers`.
+ //
+ // It is used to determine if a new layer exists for an injection
+ // or if an existing layer needs to be updated.
+ let mut layers_table = RawTable::with_capacity(self.layers.len());
+ let layers_hasher = RandomState::new();
// Use the edits to update all layers markers
- if !edits.is_empty() {
- fn point_add(a: Point, b: Point) -> Point {
- if b.row > 0 {
- Point::new(a.row.saturating_add(b.row), b.column)
- } else {
- Point::new(0, a.column.saturating_add(b.column))
- }
+ fn point_add(a: Point, b: Point) -> Point {
+ if b.row > 0 {
+ Point::new(a.row.saturating_add(b.row), b.column)
+ } else {
+ Point::new(0, a.column.saturating_add(b.column))
}
- fn point_sub(a: Point, b: Point) -> Point {
- if a.row > b.row {
- Point::new(a.row.saturating_sub(b.row), a.column)
- } else {
- Point::new(0, a.column.saturating_sub(b.column))
- }
+ }
+ fn point_sub(a: Point, b: Point) -> Point {
+ if a.row > b.row {
+ Point::new(a.row.saturating_sub(b.row), a.column)
+ } else {
+ Point::new(0, a.column.saturating_sub(b.column))
}
+ }
- for layer in self.layers.values_mut() {
- // The root layer always covers the whole range (0..usize::MAX)
- if layer.depth == 0 {
- layer.flags = LayerUpdateFlags::MODIFIED;
- continue;
- }
+ for (layer_id, layer) in self.layers.iter_mut() {
+ // The root layer always covers the whole range (0..usize::MAX)
+ if layer.depth == 0 {
+ layer.flags = LayerUpdateFlags::MODIFIED;
+ continue;
+ }
+ if !edits.is_empty() {
for range in &mut layer.ranges {
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
for edit in edits.iter().rev() {
@@ -858,6 +869,12 @@ impl Syntax {
}
}
}
+
+ let hash = layers_hasher.hash_one(layer);
+ // Safety: insert_no_grow is unsafe because it assumes that the table
+ // has enough capacity to hold additional elements.
+ // This is always the case as we reserved enough capacity above.
+ unsafe { layers_table.insert_no_grow(hash, layer_id) };
}
PARSER.with(|ts_parser| {
@@ -982,27 +999,23 @@ impl Syntax {
let depth = layer.depth + 1;
// TODO: can't inline this since matches borrows self.layers
for (config, ranges) in injections {
- // Find an existing layer
- let layer = self
- .layers
- .iter_mut()
- .find(|(_, layer)| {
- layer.depth == depth && // TODO: track parent id instead
- layer.config.language == config.language && layer.ranges == ranges
+ let new_layer = LanguageLayer {
+ tree: None,
+ config,
+ depth,
+ ranges,
+ flags: LayerUpdateFlags::empty(),
+ };
+
+ // Find an identical existing layer
+ let layer = layers_table
+ .get(layers_hasher.hash_one(&new_layer), |&it| {
+ self.layers[it] == new_layer
})
- .map(|(id, _layer)| id);
+ .copied();
// ...or insert a new one.
- let layer_id = layer.unwrap_or_else(|| {
- self.layers.insert(LanguageLayer {
- tree: None,
- config,
- depth,
- ranges,
- // set the modified flag to ensure the layer is parsed
- flags: LayerUpdateFlags::empty(),
- })
- });
+ let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
queue.push_back(layer_id);
}
@@ -1139,6 +1152,34 @@ pub struct LanguageLayer {
flags: LayerUpdateFlags,
}
+/// This PartialEq implementation only checks if that
+/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
+/// It does not check whether the layers have the same internal treesitter
+/// state.
+impl PartialEq for LanguageLayer {
+ fn eq(&self, other: &Self) -> bool {
+ self.depth == other.depth
+ && self.config.language == other.config.language
+ && self.ranges == other.ranges
+ }
+}
+
+/// Hash implementation belongs to PartialEq implementation above.
+/// See its documentation for details.
+impl Hash for LanguageLayer {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.depth.hash(state);
+ // The transmute is necessary here because tree_sitter::Language does not derive Hash at the moment.
+ // However it does use #[repr] transparent so the transmute here is safe
+ // as `Language` (which `Grammar` is an alias for) is just a newtype wrapper around a (thin) pointer.
+ // This is also compatible with the PartialEq implementation of language
+ // as that is just a pointer comparison.
+ let language: *const () = unsafe { transmute(self.config.language) };
+ language.hash(state);
+ self.ranges.hash(state);
+ }
+}
+
impl LanguageLayer {
pub fn tree(&self) -> &Tree {
// TODO: no unwrap