aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBlaž Hrastnik2021-07-11 10:36:45 +0000
committerBlaž Hrastnik2021-07-14 01:00:05 +0000
commitdd2903ff10387c04e933aa37846663131297b8b3 (patch)
tree97a6d2d98e8cf350e1e32cad28c7a83483649be9
parentdd5e8082e410032c9782835cf0fc52a469d050b1 (diff)
Dynamically load grammar libraries at runtime
-rw-r--r--.gitignore1
-rw-r--r--Cargo.lock25
-rw-r--r--helix-core/src/indent.rs4
-rw-r--r--helix-core/src/syntax.rs12
-rw-r--r--helix-syntax/Cargo.toml6
-rw-r--r--helix-syntax/build.rs204
-rw-r--r--helix-syntax/src/lib.rs115
-rw-r--r--runtime/grammars/.gitkeep0
8 files changed, 201 insertions, 166 deletions
diff --git a/.gitignore b/.gitignore
index 1a42b440..346d0946 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ target
helix-term/rustfmt.toml
helix-syntax/languages/
result
+runtime/grammars
diff --git a/Cargo.lock b/Cargo.lock
index 222751df..e262f081 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -61,9 +61,6 @@ name = "cc"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
-dependencies = [
- "jobserver",
-]
[[package]]
name = "cfg-if"
@@ -354,8 +351,9 @@ dependencies = [
name = "helix-syntax"
version = "0.3.0"
dependencies = [
+ "anyhow",
"cc",
- "serde",
+ "libloading",
"threadpool",
"tree-sitter",
]
@@ -476,15 +474,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
-name = "jobserver"
-version = "0.1.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "972f5ae5d1cb9c6ae417789196c803205313edde988685da5e3aae0827b9e7fd"
-dependencies = [
- "libc",
-]
-
-[[package]]
name = "jsonrpc-core"
version = "17.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -510,6 +499,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6"
[[package]]
+name = "libloading"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a"
+dependencies = [
+ "cfg-if 1.0.0",
+ "winapi",
+]
+
+[[package]]
name = "lock_api"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index 81bdffc0..1b36db7b 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -253,14 +253,14 @@ where
let doc = Rope::from(doc);
use crate::syntax::{
- Configuration, IndentationConfiguration, Lang, LanguageConfiguration, Loader,
+ Configuration, IndentationConfiguration, LanguageConfiguration, Loader,
};
use once_cell::sync::OnceCell;
let loader = Loader::new(Configuration {
language: vec![LanguageConfiguration {
scope: "source.rust".to_string(),
file_types: vec!["rs".to_string()],
- language_id: Lang::Rust,
+ language_id: "Rust".to_string(),
highlight_config: OnceCell::new(),
//
roots: vec![],
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 833ccfb9..f249f5fe 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -5,7 +5,7 @@ use crate::{
Rope, RopeSlice, Tendril,
};
-pub use helix_syntax::{get_language, get_language_name, Lang};
+pub use helix_syntax::get_language;
use arc_swap::ArcSwap;
@@ -31,7 +31,7 @@ pub struct Configuration {
#[serde(rename_all = "kebab-case")]
pub struct LanguageConfiguration {
#[serde(rename = "name")]
- pub(crate) language_id: Lang,
+ pub(crate) language_id: String,
pub scope: String, // source.rust
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
@@ -153,7 +153,7 @@ fn read_query(language: &str, filename: &str) -> String {
impl LanguageConfiguration {
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
- let language = get_language_name(self.language_id).to_ascii_lowercase();
+ let language = self.language_id.to_ascii_lowercase();
let highlights_query = read_query(&language, "highlights.scm");
// always highlight syntax errors
@@ -166,7 +166,7 @@ impl LanguageConfiguration {
if highlights_query.is_empty() {
None
} else {
- let language = get_language(self.language_id);
+ let language = get_language(&crate::RUNTIME_DIR, &self.language_id).ok()?;
let config = HighlightConfiguration::new(
language,
&highlights_query,
@@ -198,7 +198,7 @@ impl LanguageConfiguration {
pub fn indent_query(&self) -> Option<&IndentQuery> {
self.indent_query
.get_or_init(|| {
- let language = get_language_name(self.language_id).to_ascii_lowercase();
+ let language = self.language_id.to_ascii_lowercase();
let toml = load_runtime_file(&language, "indents.toml").ok()?;
toml::from_slice(toml.as_bytes()).ok()
@@ -1802,7 +1802,7 @@ mod test {
.map(String::from)
.collect();
- let language = get_language(Lang::Rust);
+ let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap();
let config = HighlightConfiguration::new(
language,
&std::fs::read_to_string(
diff --git a/helix-syntax/Cargo.toml b/helix-syntax/Cargo.toml
index 140e3d24..7ad24488 100644
--- a/helix-syntax/Cargo.toml
+++ b/helix-syntax/Cargo.toml
@@ -12,8 +12,10 @@ include = ["src/**/*", "languages/**/*", "build.rs", "!**/docs/**/*", "!**/test/
[dependencies]
tree-sitter = "0.19"
-serde = { version = "1.0", features = ["derive"] }
+libloading = "0.7"
+anyhow = "1"
[build-dependencies]
-cc = { version = "1", features = ["parallel"] }
+cc = { version = "1" }
threadpool = { version = "1.0" }
+anyhow = "1"
diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs
index 847f8a67..02a5bf49 100644
--- a/helix-syntax/build.rs
+++ b/helix-syntax/build.rs
@@ -1,5 +1,7 @@
+use anyhow::Result;
use std::fs;
use std::path::PathBuf;
+use std::time::SystemTime;
use std::sync::mpsc::channel;
@@ -15,66 +17,156 @@ fn collect_tree_sitter_dirs(ignore: &[String]) -> Vec<String> {
dirs
}
-fn collect_src_files(dir: &str) -> (Vec<String>, Vec<String>) {
- eprintln!("Collect files for {}", dir);
+#[cfg(unix)]
+const DYLIB_EXTENSION: &str = "so";
- let mut c_files = Vec::new();
- let mut cpp_files = Vec::new();
- let path = PathBuf::from("languages").join(&dir).join("src");
- for entry in fs::read_dir(path).unwrap().flatten() {
- let path = entry.path();
- if path
- .file_stem()
- .unwrap()
- .to_str()
- .unwrap()
- .starts_with("binding")
- {
- continue;
+#[cfg(windows)]
+const DYLIB_EXTENSION: &str = "dll";
+
+// const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
+
+use anyhow::{anyhow, Context};
+use std::{path::Path, process::Command};
+
+fn build_library(src_path: &Path, language: &str) -> Result<()> {
+ let header_path = src_path;
+ // let grammar_path = src_path.join("grammar.json");
+ let parser_path = src_path.join("parser.c");
+ let mut scanner_path = src_path.join("scanner.c");
+
+ let scanner_path = if scanner_path.exists() {
+ Some(scanner_path)
+ } else {
+ scanner_path.set_extension("cc");
+ if scanner_path.exists() {
+ Some(scanner_path)
+ } else {
+ None
+ }
+ };
+ let parser_lib_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../runtime/grammars");
+ let mut library_path = parser_lib_path.join(language);
+ library_path.set_extension(DYLIB_EXTENSION);
+
+ let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
+ .with_context(|| "Failed to compare source and binary timestamps")?;
+
+ if !recompile {
+ return Ok(());
+ }
+
+ let mut config = cc::Build::new();
+ config.cpp(true).opt_level(2).cargo_metadata(false);
+ // .target(BUILD_TARGET)
+ // .host(BUILD_TARGET);
+ let compiler = config.get_compiler();
+ let mut command = Command::new(compiler.path());
+ for (key, value) in compiler.env() {
+ command.env(key, value);
+ }
+
+ if cfg!(windows) {
+ command
+ .args(&["/nologo", "/LD", "/I"])
+ .arg(header_path)
+ .arg("/Od")
+ .arg(parser_path);
+ if let Some(scanner_path) = scanner_path.as_ref() {
+ command.arg(scanner_path);
}
- if let Some(ext) = path.extension() {
- if ext == "c" {
- c_files.push(path.to_str().unwrap().to_string());
- } else if ext == "cc" || ext == "cpp" || ext == "cxx" {
- cpp_files.push(path.to_str().unwrap().to_string());
+ command
+ .arg("/link")
+ .arg(format!("/out:{}", library_path.to_str().unwrap()));
+ } else {
+ command
+ .arg("-shared")
+ .arg("-fPIC")
+ .arg("-fno-exceptions")
+ .arg("-g")
+ .arg("-I")
+ .arg(header_path)
+ .arg("-o")
+ .arg(&library_path)
+ .arg("-O2");
+ if let Some(scanner_path) = scanner_path.as_ref() {
+ if scanner_path.extension() == Some("c".as_ref()) {
+ command.arg("-xc").arg("-std=c99").arg(scanner_path);
+ } else {
+ command.arg(scanner_path);
}
}
+ command.arg("-xc").arg(parser_path);
}
- (c_files, cpp_files)
-}
-fn build_c(files: Vec<String>, language: &str) {
- let mut build = cc::Build::new();
- for file in files {
- build
- .file(&file)
- .include(PathBuf::from(file).parent().unwrap())
- .pic(true)
- .warnings(false);
+ let output = command
+ .output()
+ .with_context(|| "Failed to execute C compiler")?;
+ if !output.status.success() {
+ return Err(anyhow!(
+ "Parser compilation failed.\nStdout: {}\nStderr: {}",
+ String::from_utf8_lossy(&output.stdout),
+ String::from_utf8_lossy(&output.stderr)
+ ));
}
- build.compile(&format!("tree-sitter-{}-c", language));
+
+ Ok(())
+}
+fn needs_recompile(
+ lib_path: &Path,
+ parser_c_path: &Path,
+ scanner_path: &Option<PathBuf>,
+) -> Result<bool> {
+ if !lib_path.exists() {
+ return Ok(true);
+ }
+ let lib_mtime = mtime(lib_path)?;
+ if mtime(parser_c_path)? > lib_mtime {
+ return Ok(true);
+ }
+ if let Some(scanner_path) = scanner_path {
+ if mtime(scanner_path)? > lib_mtime {
+ return Ok(true);
+ }
+ }
+ Ok(false)
}
-fn build_cpp(files: Vec<String>, language: &str) {
- let mut build = cc::Build::new();
+fn mtime(path: &Path) -> Result<SystemTime> {
+ Ok(fs::metadata(path)?.modified()?)
+}
- let flag = if build.get_compiler().is_like_msvc() {
- "/std:c++17"
- } else {
- "-std=c++14"
- };
+// fn build_c(files: Vec<String>, language: &str) {
+// let mut build = cc::Build::new();
+// for file in files {
+// build
+// .file(&file)
+// .include(PathBuf::from(file).parent().unwrap())
+// .pic(true)
+// .warnings(false);
+// }
+// build.compile(&format!("tree-sitter-{}-c", language));
+// }
- for file in files {
- build
- .file(&file)
- .include(PathBuf::from(file).parent().unwrap())
- .pic(true)
- .warnings(false)
- .cpp(true)
- .flag_if_supported(flag);
- }
- build.compile(&format!("tree-sitter-{}-cpp", language));
-}
+// fn build_cpp(files: Vec<String>, language: &str) {
+// let mut build = cc::Build::new();
+
+// let flag = if build.get_compiler().is_like_msvc() {
+// "/std:c++17"
+// } else {
+// "-std=c++14"
+// };
+
+// for file in files {
+// build
+// .file(&file)
+// .include(PathBuf::from(file).parent().unwrap())
+// .pic(true)
+// .warnings(false)
+// .cpp(true)
+// .flag_if_supported(flag);
+// }
+// build.compile(&format!("tree-sitter-{}-cpp", language));
+// }
fn build_dir(dir: &str, language: &str) {
println!("Build language {}", language);
@@ -92,13 +184,9 @@ fn build_dir(dir: &str, language: &str) {
eprintln!("You can fix in using 'git submodule init && git submodule update --recursive'.");
std::process::exit(1);
}
- let (c, cpp) = collect_src_files(dir);
- if !c.is_empty() {
- build_c(c, language);
- }
- if !cpp.is_empty() {
- build_cpp(cpp, language);
- }
+
+ let path = Path::new("languages").join(dir).join("src");
+ build_library(&path, language).unwrap();
}
fn main() {
@@ -129,6 +217,6 @@ fn main() {
// drop(tx);
assert_eq!(rx.try_iter().sum::<usize>(), n_jobs);
- build_dir("tree-sitter-typescript/tsx", "tsx");
- build_dir("tree-sitter-typescript/typescript", "typescript");
+ // build_dir("tree-sitter-typescript/tsx", "tsx");
+ // build_dir("tree-sitter-typescript/typescript", "typescript");
}
diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs
index 5e3bb3ea..b6c0ecf3 100644
--- a/helix-syntax/src/lib.rs
+++ b/helix-syntax/src/lib.rs
@@ -1,94 +1,39 @@
-use serde::{Deserialize, Serialize};
+use anyhow::{Context, Result};
+use libloading::{Library, Symbol};
use tree_sitter::Language;
-#[macro_export]
-macro_rules! mk_extern {
- ( $( $name:ident ),* ) => {
- $(
- extern "C" { pub fn $name() -> Language; }
- )*
- };
-}
-
-#[macro_export]
-macro_rules! mk_enum {
- ( $( $camel:ident ),* ) => {
- #[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
- #[serde(rename_all = "lowercase")]
- pub enum Lang {
- $(
- $camel,
- )*
+fn replace_dashes_with_underscores(name: &str) -> String {
+ let mut result = String::with_capacity(name.len());
+ for c in name.chars() {
+ if c == '-' {
+ result.push('_');
+ } else {
+ result.push(c);
}
- };
+ }
+ result
}
+#[cfg(unix)]
+const DYLIB_EXTENSION: &str = "so";
-#[macro_export]
-macro_rules! mk_get_language {
- ( $( ($camel:ident, $name:ident) ),* ) => {
- #[must_use]
- pub fn get_language(lang: Lang) -> Language {
- unsafe {
- match lang {
- $(
- Lang::$camel => $name(),
- )*
- }
- }
- }
- };
-}
+#[cfg(windows)]
+const DYLIB_EXTENSION: &str = "dll";
-#[macro_export]
-macro_rules! mk_get_language_name {
- ( $( $camel:ident ),* ) => {
- #[must_use]
- pub const fn get_language_name(lang: Lang) -> &'static str {
- match lang {
- $(
- Lang::$camel => stringify!($camel),
- )*
- }
- }
- };
-}
+pub fn get_language(runtime_path: &std::path::Path, name: &str) -> Result<Language> {
+ let name = name.to_ascii_lowercase();
+ let mut library_path = runtime_path.join("grammars").join(&name);
+ // TODO: duplicated under build
+ library_path.set_extension(DYLIB_EXTENSION);
-#[macro_export]
-macro_rules! mk_langs {
- ( $( ($camel:ident, $name:ident) ),* ) => {
- mk_extern!($( $name ),*);
- mk_enum!($( $camel ),*);
- mk_get_language!($( ($camel, $name) ),*);
- mk_get_language_name!($( $camel ),*);
+ let library = unsafe { Library::new(&library_path) }
+ .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
+ let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(&name));
+ let language = unsafe {
+ let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
+ .get(language_fn_name.as_bytes())
+ .with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
+ language_fn()
};
+ std::mem::forget(library);
+ Ok(language)
}
-
-mk_langs!(
- // 1) Name for enum
- // 2) tree-sitter function to call to get a Language
- (Agda, tree_sitter_agda),
- (Bash, tree_sitter_bash),
- (Cpp, tree_sitter_cpp),
- (CSharp, tree_sitter_c_sharp),
- (Css, tree_sitter_css),
- (C, tree_sitter_c),
- (Elixir, tree_sitter_elixir),
- (Go, tree_sitter_go),
- // (Haskell, tree_sitter_haskell),
- (Html, tree_sitter_html),
- (Javascript, tree_sitter_javascript),
- (Java, tree_sitter_java),
- (Json, tree_sitter_json),
- (Julia, tree_sitter_julia),
- (Latex, tree_sitter_latex),
- (Nix, tree_sitter_nix),
- (Php, tree_sitter_php),
- (Python, tree_sitter_python),
- (Ruby, tree_sitter_ruby),
- (Rust, tree_sitter_rust),
- (Scala, tree_sitter_scala),
- (Swift, tree_sitter_swift),
- (Toml, tree_sitter_toml),
- (Tsx, tree_sitter_tsx),
- (Typescript, tree_sitter_typescript)
-);
diff --git a/runtime/grammars/.gitkeep b/runtime/grammars/.gitkeep
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/runtime/grammars/.gitkeep