From dd2903ff10387c04e933aa37846663131297b8b3 Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Sun, 11 Jul 2021 19:36:45 +0900 Subject: Dynamically load grammar libraries at runtime --- helix-syntax/Cargo.toml | 6 +- helix-syntax/build.rs | 204 ++++++++++++++++++++++++++++++++++-------------- helix-syntax/src/lib.rs | 115 +++++++-------------------- 3 files changed, 180 insertions(+), 145 deletions(-) (limited to 'helix-syntax') diff --git a/helix-syntax/Cargo.toml b/helix-syntax/Cargo.toml index 140e3d24..7ad24488 100644 --- a/helix-syntax/Cargo.toml +++ b/helix-syntax/Cargo.toml @@ -12,8 +12,10 @@ include = ["src/**/*", "languages/**/*", "build.rs", "!**/docs/**/*", "!**/test/ [dependencies] tree-sitter = "0.19" -serde = { version = "1.0", features = ["derive"] } +libloading = "0.7" +anyhow = "1" [build-dependencies] -cc = { version = "1", features = ["parallel"] } +cc = { version = "1" } threadpool = { version = "1.0" } +anyhow = "1" diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs index 847f8a67..02a5bf49 100644 --- a/helix-syntax/build.rs +++ b/helix-syntax/build.rs @@ -1,5 +1,7 @@ +use anyhow::Result; use std::fs; use std::path::PathBuf; +use std::time::SystemTime; use std::sync::mpsc::channel; @@ -15,66 +17,156 @@ fn collect_tree_sitter_dirs(ignore: &[String]) -> Vec { dirs } -fn collect_src_files(dir: &str) -> (Vec, Vec) { - eprintln!("Collect files for {}", dir); +#[cfg(unix)] +const DYLIB_EXTENSION: &str = "so"; - let mut c_files = Vec::new(); - let mut cpp_files = Vec::new(); - let path = PathBuf::from("languages").join(&dir).join("src"); - for entry in fs::read_dir(path).unwrap().flatten() { - let path = entry.path(); - if path - .file_stem() - .unwrap() - .to_str() - .unwrap() - .starts_with("binding") - { - continue; +#[cfg(windows)] +const DYLIB_EXTENSION: &str = "dll"; + +// const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); + +use anyhow::{anyhow, Context}; +use std::{path::Path, process::Command}; + +fn build_library(src_path: &Path, language: &str) -> Result<()> { + let header_path = src_path; + // let grammar_path = src_path.join("grammar.json"); + let parser_path = src_path.join("parser.c"); + let mut scanner_path = src_path.join("scanner.c"); + + let scanner_path = if scanner_path.exists() { + Some(scanner_path) + } else { + scanner_path.set_extension("cc"); + if scanner_path.exists() { + Some(scanner_path) + } else { + None + } + }; + let parser_lib_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../runtime/grammars"); + let mut library_path = parser_lib_path.join(language); + library_path.set_extension(DYLIB_EXTENSION); + + let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) + .with_context(|| "Failed to compare source and binary timestamps")?; + + if !recompile { + return Ok(()); + } + + let mut config = cc::Build::new(); + config.cpp(true).opt_level(2).cargo_metadata(false); + // .target(BUILD_TARGET) + // .host(BUILD_TARGET); + let compiler = config.get_compiler(); + let mut command = Command::new(compiler.path()); + for (key, value) in compiler.env() { + command.env(key, value); + } + + if cfg!(windows) { + command + .args(&["/nologo", "/LD", "/I"]) + .arg(header_path) + .arg("/Od") + .arg(parser_path); + if let Some(scanner_path) = scanner_path.as_ref() { + command.arg(scanner_path); } - if let Some(ext) = path.extension() { - if ext == "c" { - c_files.push(path.to_str().unwrap().to_string()); - } else if ext == "cc" || ext == "cpp" || ext == "cxx" { - cpp_files.push(path.to_str().unwrap().to_string()); + command + .arg("/link") + .arg(format!("/out:{}", library_path.to_str().unwrap())); + } else { + command + .arg("-shared") + .arg("-fPIC") + .arg("-fno-exceptions") + .arg("-g") + .arg("-I") + .arg(header_path) + .arg("-o") + .arg(&library_path) + .arg("-O2"); + if let Some(scanner_path) = scanner_path.as_ref() { + if scanner_path.extension() == Some("c".as_ref()) { + command.arg("-xc").arg("-std=c99").arg(scanner_path); + } else { + command.arg(scanner_path); } } + command.arg("-xc").arg(parser_path); } - (c_files, cpp_files) -} -fn build_c(files: Vec, language: &str) { - let mut build = cc::Build::new(); - for file in files { - build - .file(&file) - .include(PathBuf::from(file).parent().unwrap()) - .pic(true) - .warnings(false); + let output = command + .output() + .with_context(|| "Failed to execute C compiler")?; + if !output.status.success() { + return Err(anyhow!( + "Parser compilation failed.\nStdout: {}\nStderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + )); } - build.compile(&format!("tree-sitter-{}-c", language)); + + Ok(()) +} +fn needs_recompile( + lib_path: &Path, + parser_c_path: &Path, + scanner_path: &Option, +) -> Result { + if !lib_path.exists() { + return Ok(true); + } + let lib_mtime = mtime(lib_path)?; + if mtime(parser_c_path)? > lib_mtime { + return Ok(true); + } + if let Some(scanner_path) = scanner_path { + if mtime(scanner_path)? > lib_mtime { + return Ok(true); + } + } + Ok(false) } -fn build_cpp(files: Vec, language: &str) { - let mut build = cc::Build::new(); +fn mtime(path: &Path) -> Result { + Ok(fs::metadata(path)?.modified()?) +} - let flag = if build.get_compiler().is_like_msvc() { - "/std:c++17" - } else { - "-std=c++14" - }; +// fn build_c(files: Vec, language: &str) { +// let mut build = cc::Build::new(); +// for file in files { +// build +// .file(&file) +// .include(PathBuf::from(file).parent().unwrap()) +// .pic(true) +// .warnings(false); +// } +// build.compile(&format!("tree-sitter-{}-c", language)); +// } - for file in files { - build - .file(&file) - .include(PathBuf::from(file).parent().unwrap()) - .pic(true) - .warnings(false) - .cpp(true) - .flag_if_supported(flag); - } - build.compile(&format!("tree-sitter-{}-cpp", language)); -} +// fn build_cpp(files: Vec, language: &str) { +// let mut build = cc::Build::new(); + +// let flag = if build.get_compiler().is_like_msvc() { +// "/std:c++17" +// } else { +// "-std=c++14" +// }; + +// for file in files { +// build +// .file(&file) +// .include(PathBuf::from(file).parent().unwrap()) +// .pic(true) +// .warnings(false) +// .cpp(true) +// .flag_if_supported(flag); +// } +// build.compile(&format!("tree-sitter-{}-cpp", language)); +// } fn build_dir(dir: &str, language: &str) { println!("Build language {}", language); @@ -92,13 +184,9 @@ fn build_dir(dir: &str, language: &str) { eprintln!("You can fix in using 'git submodule init && git submodule update --recursive'."); std::process::exit(1); } - let (c, cpp) = collect_src_files(dir); - if !c.is_empty() { - build_c(c, language); - } - if !cpp.is_empty() { - build_cpp(cpp, language); - } + + let path = Path::new("languages").join(dir).join("src"); + build_library(&path, language).unwrap(); } fn main() { @@ -129,6 +217,6 @@ fn main() { // drop(tx); assert_eq!(rx.try_iter().sum::(), n_jobs); - build_dir("tree-sitter-typescript/tsx", "tsx"); - build_dir("tree-sitter-typescript/typescript", "typescript"); + // build_dir("tree-sitter-typescript/tsx", "tsx"); + // build_dir("tree-sitter-typescript/typescript", "typescript"); } diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs index 5e3bb3ea..b6c0ecf3 100644 --- a/helix-syntax/src/lib.rs +++ b/helix-syntax/src/lib.rs @@ -1,94 +1,39 @@ -use serde::{Deserialize, Serialize}; +use anyhow::{Context, Result}; +use libloading::{Library, Symbol}; use tree_sitter::Language; -#[macro_export] -macro_rules! mk_extern { - ( $( $name:ident ),* ) => { - $( - extern "C" { pub fn $name() -> Language; } - )* - }; -} - -#[macro_export] -macro_rules! mk_enum { - ( $( $camel:ident ),* ) => { - #[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] - #[serde(rename_all = "lowercase")] - pub enum Lang { - $( - $camel, - )* +fn replace_dashes_with_underscores(name: &str) -> String { + let mut result = String::with_capacity(name.len()); + for c in name.chars() { + if c == '-' { + result.push('_'); + } else { + result.push(c); } - }; + } + result } +#[cfg(unix)] +const DYLIB_EXTENSION: &str = "so"; -#[macro_export] -macro_rules! mk_get_language { - ( $( ($camel:ident, $name:ident) ),* ) => { - #[must_use] - pub fn get_language(lang: Lang) -> Language { - unsafe { - match lang { - $( - Lang::$camel => $name(), - )* - } - } - } - }; -} +#[cfg(windows)] +const DYLIB_EXTENSION: &str = "dll"; -#[macro_export] -macro_rules! mk_get_language_name { - ( $( $camel:ident ),* ) => { - #[must_use] - pub const fn get_language_name(lang: Lang) -> &'static str { - match lang { - $( - Lang::$camel => stringify!($camel), - )* - } - } - }; -} +pub fn get_language(runtime_path: &std::path::Path, name: &str) -> Result { + let name = name.to_ascii_lowercase(); + let mut library_path = runtime_path.join("grammars").join(&name); + // TODO: duplicated under build + library_path.set_extension(DYLIB_EXTENSION); -#[macro_export] -macro_rules! mk_langs { - ( $( ($camel:ident, $name:ident) ),* ) => { - mk_extern!($( $name ),*); - mk_enum!($( $camel ),*); - mk_get_language!($( ($camel, $name) ),*); - mk_get_language_name!($( $camel ),*); + let library = unsafe { Library::new(&library_path) } + .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?; + let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(&name)); + let language = unsafe { + let language_fn: Symbol Language> = library + .get(language_fn_name.as_bytes()) + .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; + language_fn() }; + std::mem::forget(library); + Ok(language) } - -mk_langs!( - // 1) Name for enum - // 2) tree-sitter function to call to get a Language - (Agda, tree_sitter_agda), - (Bash, tree_sitter_bash), - (Cpp, tree_sitter_cpp), - (CSharp, tree_sitter_c_sharp), - (Css, tree_sitter_css), - (C, tree_sitter_c), - (Elixir, tree_sitter_elixir), - (Go, tree_sitter_go), - // (Haskell, tree_sitter_haskell), - (Html, tree_sitter_html), - (Javascript, tree_sitter_javascript), - (Java, tree_sitter_java), - (Json, tree_sitter_json), - (Julia, tree_sitter_julia), - (Latex, tree_sitter_latex), - (Nix, tree_sitter_nix), - (Php, tree_sitter_php), - (Python, tree_sitter_python), - (Ruby, tree_sitter_ruby), - (Rust, tree_sitter_rust), - (Scala, tree_sitter_scala), - (Swift, tree_sitter_swift), - (Toml, tree_sitter_toml), - (Tsx, tree_sitter_tsx), - (Typescript, tree_sitter_typescript) -); -- cgit v1.2.3-70-g09d2 From c8dc9b64dd0f6726cd1d36a61c563212721ef249 Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Mon, 12 Jul 2021 00:44:14 +0900 Subject: windows: Try building inside OUT_DIR? --- helix-syntax/build.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'helix-syntax') diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs index 02a5bf49..2882f513 100644 --- a/helix-syntax/build.rs +++ b/helix-syntax/build.rs @@ -7,7 +7,8 @@ use std::sync::mpsc::channel; fn collect_tree_sitter_dirs(ignore: &[String]) -> Vec { let mut dirs = Vec::new(); - for entry in fs::read_dir("languages").unwrap().flatten() { + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("languages"); + for entry in fs::read_dir(path).unwrap().flatten() { let path = entry.path(); let dir = path.file_name().unwrap().to_str().unwrap().to_string(); if !ignore.contains(&dir) { @@ -54,13 +55,14 @@ fn build_library(src_path: &Path, language: &str) -> Result<()> { if !recompile { return Ok(()); } - + let build_dir = std::env::var("OUT_DIR").unwrap(); let mut config = cc::Build::new(); config.cpp(true).opt_level(2).cargo_metadata(false); // .target(BUILD_TARGET) // .host(BUILD_TARGET); let compiler = config.get_compiler(); let mut command = Command::new(compiler.path()); + command.current_dir(build_dir); for (key, value) in compiler.env() { command.env(key, value); } @@ -185,7 +187,10 @@ fn build_dir(dir: &str, language: &str) { std::process::exit(1); } - let path = Path::new("languages").join(dir).join("src"); + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("languages") + .join(dir) + .join("src"); build_library(&path, language).unwrap(); } -- cgit v1.2.3-70-g09d2 From a7fa5621ce313e4e7d16621cf04b218d1ba1c3fa Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Mon, 12 Jul 2021 01:01:56 +0900 Subject: Try to rearrange the file order? --- helix-syntax/build.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'helix-syntax') diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs index 2882f513..b73658d1 100644 --- a/helix-syntax/build.rs +++ b/helix-syntax/build.rs @@ -71,12 +71,13 @@ fn build_library(src_path: &Path, language: &str) -> Result<()> { command .args(&["/nologo", "/LD", "/I"]) .arg(header_path) - .arg("/Od") - .arg(parser_path); + .arg("/Od"); if let Some(scanner_path) = scanner_path.as_ref() { command.arg(scanner_path); } + command + .arg(parser_path) .arg("/link") .arg(format!("/out:{}", library_path.to_str().unwrap())); } else { -- cgit v1.2.3-70-g09d2 From e6bf6a8f285f4d11eb7be0b745b70d37e1dd6dfa Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Mon, 12 Jul 2021 17:48:45 +0900 Subject: Build each grammar in it's own src dir Windows places temporary files in the current dir, so compiling in parallel caused conflicts. --- helix-syntax/build.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'helix-syntax') diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs index b73658d1..ff82c892 100644 --- a/helix-syntax/build.rs +++ b/helix-syntax/build.rs @@ -55,14 +55,13 @@ fn build_library(src_path: &Path, language: &str) -> Result<()> { if !recompile { return Ok(()); } - let build_dir = std::env::var("OUT_DIR").unwrap(); let mut config = cc::Build::new(); config.cpp(true).opt_level(2).cargo_metadata(false); // .target(BUILD_TARGET) // .host(BUILD_TARGET); let compiler = config.get_compiler(); let mut command = Command::new(compiler.path()); - command.current_dir(build_dir); + command.current_dir(src_path); for (key, value) in compiler.env() { command.env(key, value); } -- cgit v1.2.3-70-g09d2 From a4b077e9b901f19583ad04282ac502f99599f2cc Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Tue, 13 Jul 2021 23:27:06 +0900 Subject: Build ts/tsx again, refactor collect_tree_sitter_dirs --- helix-syntax/build.rs | 78 +++++++++++++++++---------------------------------- 1 file changed, 26 insertions(+), 52 deletions(-) (limited to 'helix-syntax') diff --git a/helix-syntax/build.rs b/helix-syntax/build.rs index ff82c892..02c4bc0a 100644 --- a/helix-syntax/build.rs +++ b/helix-syntax/build.rs @@ -1,21 +1,35 @@ -use anyhow::Result; +use anyhow::{anyhow, Context, Result}; use std::fs; -use std::path::PathBuf; use std::time::SystemTime; +use std::{ + path::{Path, PathBuf}, + process::Command, +}; use std::sync::mpsc::channel; -fn collect_tree_sitter_dirs(ignore: &[String]) -> Vec { +fn collect_tree_sitter_dirs(ignore: &[String]) -> Result> { let mut dirs = Vec::new(); let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("languages"); - for entry in fs::read_dir(path).unwrap().flatten() { + + for entry in fs::read_dir(path)? { + let entry = entry?; let path = entry.path(); + + if !entry.file_type()?.is_dir() { + continue; + } + let dir = path.file_name().unwrap().to_str().unwrap().to_string(); - if !ignore.contains(&dir) { - dirs.push(dir); + + // filter ignores + if ignore.contains(&dir) { + continue; } + dirs.push(dir) } - dirs + + Ok(dirs) } #[cfg(unix)] @@ -24,11 +38,6 @@ const DYLIB_EXTENSION: &str = "so"; #[cfg(windows)] const DYLIB_EXTENSION: &str = "dll"; -// const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); - -use anyhow::{anyhow, Context}; -use std::{path::Path, process::Command}; - fn build_library(src_path: &Path, language: &str) -> Result<()> { let header_path = src_path; // let grammar_path = src_path.join("grammar.json"); @@ -57,8 +66,6 @@ fn build_library(src_path: &Path, language: &str) -> Result<()> { } let mut config = cc::Build::new(); config.cpp(true).opt_level(2).cargo_metadata(false); - // .target(BUILD_TARGET) - // .host(BUILD_TARGET); let compiler = config.get_compiler(); let mut command = Command::new(compiler.path()); command.current_dir(src_path); @@ -137,39 +144,6 @@ fn mtime(path: &Path) -> Result { Ok(fs::metadata(path)?.modified()?) } -// fn build_c(files: Vec, language: &str) { -// let mut build = cc::Build::new(); -// for file in files { -// build -// .file(&file) -// .include(PathBuf::from(file).parent().unwrap()) -// .pic(true) -// .warnings(false); -// } -// build.compile(&format!("tree-sitter-{}-c", language)); -// } - -// fn build_cpp(files: Vec, language: &str) { -// let mut build = cc::Build::new(); - -// let flag = if build.get_compiler().is_like_msvc() { -// "/std:c++17" -// } else { -// "-std=c++14" -// }; - -// for file in files { -// build -// .file(&file) -// .include(PathBuf::from(file).parent().unwrap()) -// .pic(true) -// .warnings(false) -// .cpp(true) -// .flag_if_supported(flag); -// } -// build.compile(&format!("tree-sitter-{}-cpp", language)); -// } - fn build_dir(dir: &str, language: &str) { println!("Build language {}", language); if PathBuf::from("languages") @@ -191,6 +165,7 @@ fn build_dir(dir: &str, language: &str) { .join("languages") .join(dir) .join("src"); + build_library(&path, language).unwrap(); } @@ -198,9 +173,8 @@ fn main() { let ignore = vec![ "tree-sitter-typescript".to_string(), "tree-sitter-haskell".to_string(), // aarch64 failures: https://github.com/tree-sitter/tree-sitter-haskell/issues/34 - ".DS_Store".to_string(), ]; - let dirs = collect_tree_sitter_dirs(&ignore); + let dirs = collect_tree_sitter_dirs(&ignore).unwrap(); let mut n_jobs = 0; let pool = threadpool::Builder::new().build(); // by going through the builder, it'll use num_cpus @@ -211,7 +185,7 @@ fn main() { n_jobs += 1; pool.execute(move || { - let language = &dir[12..]; // skip tree-sitter- prefix + let language = &dir.strip_prefix("tree-sitter-").unwrap(); build_dir(&dir, language); // report progress @@ -222,6 +196,6 @@ fn main() { // drop(tx); assert_eq!(rx.try_iter().sum::(), n_jobs); - // build_dir("tree-sitter-typescript/tsx", "tsx"); - // build_dir("tree-sitter-typescript/typescript", "typescript"); + build_dir("tree-sitter-typescript/tsx", "tsx"); + build_dir("tree-sitter-typescript/typescript", "typescript"); } -- cgit v1.2.3-70-g09d2