From dd2903ff10387c04e933aa37846663131297b8b3 Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Sun, 11 Jul 2021 19:36:45 +0900 Subject: Dynamically load grammar libraries at runtime --- .gitignore | 1 + Cargo.lock | 25 +++--- helix-core/src/indent.rs | 4 +- helix-core/src/syntax.rs | 12 +-- helix-syntax/Cargo.toml | 6 +- helix-syntax/build.rs | 204 +++++++++++++++++++++++++++++++++------------- helix-syntax/src/lib.rs | 115 +++++++------------------- runtime/grammars/.gitkeep | 0 8 files changed, 201 insertions(+), 166 deletions(-) create mode 100644 runtime/grammars/.gitkeep diff --git a/.gitignore b/.gitignore index 1a42b440..346d0946 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ target helix-term/rustfmt.toml helix-syntax/languages/ result +runtime/grammars diff --git a/Cargo.lock b/Cargo.lock index 222751df..e262f081 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,9 +61,6 @@ name = "cc" version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" -dependencies = [ - "jobserver", -] [[package]] name = "cfg-if" @@ -354,8 +351,9 @@ dependencies = [ name = "helix-syntax" version = "0.3.0" dependencies = [ + "anyhow", "cc", - "serde", + "libloading", "threadpool", "tree-sitter", ] @@ -475,15 +473,6 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" -[[package]] -name = "jobserver" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "972f5ae5d1cb9c6ae417789196c803205313edde988685da5e3aae0827b9e7fd" -dependencies = [ - "libc", -] - [[package]] name = "jsonrpc-core" version = "17.1.0" @@ -509,6 +498,16 @@ version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6" +[[package]] +name = "libloading" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" +dependencies = [ + "cfg-if 1.0.0", + "winapi", +] + [[package]] name = "lock_api" version = "0.4.4" diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 81bdffc0..1b36db7b 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -253,14 +253,14 @@ where let doc = Rope::from(doc); use crate::syntax::{ - Configuration, IndentationConfiguration, Lang, LanguageConfiguration, Loader, + Configuration, IndentationConfiguration, LanguageConfiguration, Loader, }; use once_cell::sync::OnceCell; let loader = Loader::new(Configuration { language: vec![LanguageConfiguration { scope: "source.rust".to_string(), file_types: vec!["rs".to_string()], - language_id: Lang::Rust, + language_id: "Rust".to_string(), highlight_config: OnceCell::new(), // roots: vec![], diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 833ccfb9..f249f5fe 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -5,7 +5,7 @@ use crate::{ Rope, RopeSlice, Tendril, }; -pub use helix_syntax::{get_language, get_language_name, Lang}; +pub use helix_syntax::get_language; use arc_swap::ArcSwap; @@ -31,7 +31,7 @@ pub struct Configuration { #[serde(rename_all = "kebab-case")] pub struct LanguageConfiguration { #[serde(rename = "name")] - pub(crate) language_id: Lang, + pub(crate) language_id: String, pub scope: String, // source.rust pub file_types: Vec, // filename ends_with? pub roots: Vec, // these indicate project roots <.git, Cargo.toml> @@ -153,7 +153,7 @@ fn read_query(language: &str, filename: &str) -> String { impl LanguageConfiguration { fn initialize_highlight(&self, scopes: &[String]) -> Option> { - let language = get_language_name(self.language_id).to_ascii_lowercase(); + let language = self.language_id.to_ascii_lowercase(); let highlights_query = read_query(&language, "highlights.scm"); // always highlight syntax errors @@ -166,7 +166,7 @@ impl LanguageConfiguration { if highlights_query.is_empty() { None } else { - let language = get_language(self.language_id); + let language = get_language(&crate::RUNTIME_DIR, &self.language_id).ok()?; let config = HighlightConfiguration::new( language, &highlights_query, @@ -198,7 +198,7 @@ impl LanguageConfiguration { pub fn indent_query(&self) -> Option<&IndentQuery> { self.indent_query .get_or_init(|| { - let language = get_language_name(self.language_id).to_ascii_lowercase(); + let language = self.language_id.to_ascii_lowercase(); let toml = load_runtime_file(&language, "indents.toml").ok()?; toml::from_slice(toml.as_bytes()).ok() @@ -1802,7 +1802,7 @@ mod test { .map(String::from) .collect(); - let language = get_language(Lang::Rust); + let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap(); let config = HighlightConfiguration::new( language, &std::fs::read_to_string( diff --git a/helix-syntax/Cargo.toml b/helix-syntax/Cargo.toml index 140e3d24..7ad24488 100644 --- a/helix-syntax/Cargo.toml +++ b/helix-syntax/Cargo.toml @@ -12,8 +12,10 @@ include = ["src/**/*", "languages/**/*", "build.rs", "!**/docs/**/*", "!**/test/ [dependencies] tree-sitter = "0.19" -serde = { version = "1.0", features = ["derive"] } +libloading = "0.7" +anyhow = "1" [build-dependencies] -cc = { version = "1", features = ["parallel"] } +cc = { version = "1" } threadpool = { version = "1.0" } +anyhow = "1" diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs index 847f8a67..02a5bf49 100644 --- a/helix-syntax/build.rs +++ b/helix-syntax/build.rs @@ -1,5 +1,7 @@ +use anyhow::Result; use std::fs; use std::path::PathBuf; +use std::time::SystemTime; use std::sync::mpsc::channel; @@ -15,66 +17,156 @@ fn collect_tree_sitter_dirs(ignore: &[String]) -> Vec { dirs } -fn collect_src_files(dir: &str) -> (Vec, Vec) { - eprintln!("Collect files for {}", dir); +#[cfg(unix)] +const DYLIB_EXTENSION: &str = "so"; - let mut c_files = Vec::new(); - let mut cpp_files = Vec::new(); - let path = PathBuf::from("languages").join(&dir).join("src"); - for entry in fs::read_dir(path).unwrap().flatten() { - let path = entry.path(); - if path - .file_stem() - .unwrap() - .to_str() - .unwrap() - .starts_with("binding") - { - continue; +#[cfg(windows)] +const DYLIB_EXTENSION: &str = "dll"; + +// const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); + +use anyhow::{anyhow, Context}; +use std::{path::Path, process::Command}; + +fn build_library(src_path: &Path, language: &str) -> Result<()> { + let header_path = src_path; + // let grammar_path = src_path.join("grammar.json"); + let parser_path = src_path.join("parser.c"); + let mut scanner_path = src_path.join("scanner.c"); + + let scanner_path = if scanner_path.exists() { + Some(scanner_path) + } else { + scanner_path.set_extension("cc"); + if scanner_path.exists() { + Some(scanner_path) + } else { + None + } + }; + let parser_lib_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../runtime/grammars"); + let mut library_path = parser_lib_path.join(language); + library_path.set_extension(DYLIB_EXTENSION); + + let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) + .with_context(|| "Failed to compare source and binary timestamps")?; + + if !recompile { + return Ok(()); + } + + let mut config = cc::Build::new(); + config.cpp(true).opt_level(2).cargo_metadata(false); + // .target(BUILD_TARGET) + // .host(BUILD_TARGET); + let compiler = config.get_compiler(); + let mut command = Command::new(compiler.path()); + for (key, value) in compiler.env() { + command.env(key, value); + } + + if cfg!(windows) { + command + .args(&["/nologo", "/LD", "/I"]) + .arg(header_path) + .arg("/Od") + .arg(parser_path); + if let Some(scanner_path) = scanner_path.as_ref() { + command.arg(scanner_path); } - if let Some(ext) = path.extension() { - if ext == "c" { - c_files.push(path.to_str().unwrap().to_string()); - } else if ext == "cc" || ext == "cpp" || ext == "cxx" { - cpp_files.push(path.to_str().unwrap().to_string()); + command + .arg("/link") + .arg(format!("/out:{}", library_path.to_str().unwrap())); + } else { + command + .arg("-shared") + .arg("-fPIC") + .arg("-fno-exceptions") + .arg("-g") + .arg("-I") + .arg(header_path) + .arg("-o") + .arg(&library_path) + .arg("-O2"); + if let Some(scanner_path) = scanner_path.as_ref() { + if scanner_path.extension() == Some("c".as_ref()) { + command.arg("-xc").arg("-std=c99").arg(scanner_path); + } else { + command.arg(scanner_path); } } + command.arg("-xc").arg(parser_path); } - (c_files, cpp_files) -} -fn build_c(files: Vec, language: &str) { - let mut build = cc::Build::new(); - for file in files { - build - .file(&file) - .include(PathBuf::from(file).parent().unwrap()) - .pic(true) - .warnings(false); + let output = command + .output() + .with_context(|| "Failed to execute C compiler")?; + if !output.status.success() { + return Err(anyhow!( + "Parser compilation failed.\nStdout: {}\nStderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + )); } - build.compile(&format!("tree-sitter-{}-c", language)); + + Ok(()) +} +fn needs_recompile( + lib_path: &Path, + parser_c_path: &Path, + scanner_path: &Option, +) -> Result { + if !lib_path.exists() { + return Ok(true); + } + let lib_mtime = mtime(lib_path)?; + if mtime(parser_c_path)? > lib_mtime { + return Ok(true); + } + if let Some(scanner_path) = scanner_path { + if mtime(scanner_path)? > lib_mtime { + return Ok(true); + } + } + Ok(false) } -fn build_cpp(files: Vec, language: &str) { - let mut build = cc::Build::new(); +fn mtime(path: &Path) -> Result { + Ok(fs::metadata(path)?.modified()?) +} - let flag = if build.get_compiler().is_like_msvc() { - "/std:c++17" - } else { - "-std=c++14" - }; +// fn build_c(files: Vec, language: &str) { +// let mut build = cc::Build::new(); +// for file in files { +// build +// .file(&file) +// .include(PathBuf::from(file).parent().unwrap()) +// .pic(true) +// .warnings(false); +// } +// build.compile(&format!("tree-sitter-{}-c", language)); +// } - for file in files { - build - .file(&file) - .include(PathBuf::from(file).parent().unwrap()) - .pic(true) - .warnings(false) - .cpp(true) - .flag_if_supported(flag); - } - build.compile(&format!("tree-sitter-{}-cpp", language)); -} +// fn build_cpp(files: Vec, language: &str) { +// let mut build = cc::Build::new(); + +// let flag = if build.get_compiler().is_like_msvc() { +// "/std:c++17" +// } else { +// "-std=c++14" +// }; + +// for file in files { +// build +// .file(&file) +// .include(PathBuf::from(file).parent().unwrap()) +// .pic(true) +// .warnings(false) +// .cpp(true) +// .flag_if_supported(flag); +// } +// build.compile(&format!("tree-sitter-{}-cpp", language)); +// } fn build_dir(dir: &str, language: &str) { println!("Build language {}", language); @@ -92,13 +184,9 @@ fn build_dir(dir: &str, language: &str) { eprintln!("You can fix in using 'git submodule init && git submodule update --recursive'."); std::process::exit(1); } - let (c, cpp) = collect_src_files(dir); - if !c.is_empty() { - build_c(c, language); - } - if !cpp.is_empty() { - build_cpp(cpp, language); - } + + let path = Path::new("languages").join(dir).join("src"); + build_library(&path, language).unwrap(); } fn main() { @@ -129,6 +217,6 @@ fn main() { // drop(tx); assert_eq!(rx.try_iter().sum::(), n_jobs); - build_dir("tree-sitter-typescript/tsx", "tsx"); - build_dir("tree-sitter-typescript/typescript", "typescript"); + // build_dir("tree-sitter-typescript/tsx", "tsx"); + // build_dir("tree-sitter-typescript/typescript", "typescript"); } diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs index 5e3bb3ea..b6c0ecf3 100644 --- a/helix-syntax/src/lib.rs +++ b/helix-syntax/src/lib.rs @@ -1,94 +1,39 @@ -use serde::{Deserialize, Serialize}; +use anyhow::{Context, Result}; +use libloading::{Library, Symbol}; use tree_sitter::Language; -#[macro_export] -macro_rules! mk_extern { - ( $( $name:ident ),* ) => { - $( - extern "C" { pub fn $name() -> Language; } - )* - }; -} - -#[macro_export] -macro_rules! mk_enum { - ( $( $camel:ident ),* ) => { - #[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] - #[serde(rename_all = "lowercase")] - pub enum Lang { - $( - $camel, - )* +fn replace_dashes_with_underscores(name: &str) -> String { + let mut result = String::with_capacity(name.len()); + for c in name.chars() { + if c == '-' { + result.push('_'); + } else { + result.push(c); } - }; + } + result } +#[cfg(unix)] +const DYLIB_EXTENSION: &str = "so"; -#[macro_export] -macro_rules! mk_get_language { - ( $( ($camel:ident, $name:ident) ),* ) => { - #[must_use] - pub fn get_language(lang: Lang) -> Language { - unsafe { - match lang { - $( - Lang::$camel => $name(), - )* - } - } - } - }; -} +#[cfg(windows)] +const DYLIB_EXTENSION: &str = "dll"; -#[macro_export] -macro_rules! mk_get_language_name { - ( $( $camel:ident ),* ) => { - #[must_use] - pub const fn get_language_name(lang: Lang) -> &'static str { - match lang { - $( - Lang::$camel => stringify!($camel), - )* - } - } - }; -} +pub fn get_language(runtime_path: &std::path::Path, name: &str) -> Result { + let name = name.to_ascii_lowercase(); + let mut library_path = runtime_path.join("grammars").join(&name); + // TODO: duplicated under build + library_path.set_extension(DYLIB_EXTENSION); -#[macro_export] -macro_rules! mk_langs { - ( $( ($camel:ident, $name:ident) ),* ) => { - mk_extern!($( $name ),*); - mk_enum!($( $camel ),*); - mk_get_language!($( ($camel, $name) ),*); - mk_get_language_name!($( $camel ),*); + let library = unsafe { Library::new(&library_path) } + .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?; + let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(&name)); + let language = unsafe { + let language_fn: Symbol Language> = library + .get(language_fn_name.as_bytes()) + .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; + language_fn() }; + std::mem::forget(library); + Ok(language) } - -mk_langs!( - // 1) Name for enum - // 2) tree-sitter function to call to get a Language - (Agda, tree_sitter_agda), - (Bash, tree_sitter_bash), - (Cpp, tree_sitter_cpp), - (CSharp, tree_sitter_c_sharp), - (Css, tree_sitter_css), - (C, tree_sitter_c), - (Elixir, tree_sitter_elixir), - (Go, tree_sitter_go), - // (Haskell, tree_sitter_haskell), - (Html, tree_sitter_html), - (Javascript, tree_sitter_javascript), - (Java, tree_sitter_java), - (Json, tree_sitter_json), - (Julia, tree_sitter_julia), - (Latex, tree_sitter_latex), - (Nix, tree_sitter_nix), - (Php, tree_sitter_php), - (Python, tree_sitter_python), - (Ruby, tree_sitter_ruby), - (Rust, tree_sitter_rust), - (Scala, tree_sitter_scala), - (Swift, tree_sitter_swift), - (Toml, tree_sitter_toml), - (Tsx, tree_sitter_tsx), - (Typescript, tree_sitter_typescript) -); diff --git a/runtime/grammars/.gitkeep b/runtime/grammars/.gitkeep new file mode 100644 index 00000000..e69de29b -- cgit v1.2.3-70-g09d2