diff options
Diffstat (limited to 'helix-loader/src')
-rw-r--r-- | helix-loader/src/grammar.rs | 388 | ||||
-rw-r--r-- | helix-loader/src/lib.rs | 161 |
2 files changed, 549 insertions, 0 deletions
diff --git a/helix-loader/src/grammar.rs b/helix-loader/src/grammar.rs new file mode 100644 index 00000000..61ef464f --- /dev/null +++ b/helix-loader/src/grammar.rs @@ -0,0 +1,388 @@ +use anyhow::{anyhow, Context, Result}; +use libloading::{Library, Symbol}; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::time::SystemTime; +use std::{ + collections::HashSet, + path::{Path, PathBuf}, + process::Command, + sync::mpsc::channel, +}; +use tree_sitter::Language; + +#[cfg(unix)] +const DYLIB_EXTENSION: &str = "so"; + +#[cfg(windows)] +const DYLIB_EXTENSION: &str = "dll"; + +#[derive(Debug, Serialize, Deserialize)] +struct Configuration { + #[serde(rename = "use-grammars")] + pub grammar_selection: Option<GrammarSelection>, + pub grammar: Vec<GrammarConfiguration>, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "lowercase", untagged)] +pub enum GrammarSelection { + Only(HashSet<String>), + Except(HashSet<String>), +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct GrammarConfiguration { + #[serde(rename = "name")] + pub grammar_id: String, + pub source: GrammarSource, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "lowercase", untagged)] +pub enum GrammarSource { + Local { + path: String, + }, + Git { + #[serde(rename = "git")] + remote: String, + #[serde(rename = "rev")] + revision: String, + subpath: Option<String>, + }, +} + +const BUILD_TARGET: &str = env!("BUILD_TARGET"); +const REMOTE_NAME: &str = "origin"; + +pub fn get_language(name: &str) -> Result<Language> { + let name = name.to_ascii_lowercase(); + let mut library_path = crate::runtime_dir().join("grammars").join(&name); + library_path.set_extension(DYLIB_EXTENSION); + + let library = unsafe { Library::new(&library_path) } + .with_context(|| format!("Error opening dynamic library {library_path:?}"))?; + let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_")); + let language = unsafe { + let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library + .get(language_fn_name.as_bytes()) + .with_context(|| format!("Failed to load symbol {language_fn_name}"))?; + language_fn() + }; + std::mem::forget(library); + Ok(language) +} + +pub fn fetch_grammars() -> Result<()> { + run_parallel(get_grammar_configs()?, fetch_grammar, "fetch") +} + +pub fn build_grammars() -> Result<()> { + run_parallel(get_grammar_configs()?, build_grammar, "build") +} + +// Returns the set of grammar configurations the user requests. +// Grammars are configured in the default and user `languages.toml` and are +// merged. The `grammar_selection` key of the config is then used to filter +// down all grammars into a subset of the user's choosing. +fn get_grammar_configs() -> Result<Vec<GrammarConfiguration>> { + let config: Configuration = crate::user_lang_config() + .context("Could not parse languages.toml")? + .try_into()?; + + let grammars = match config.grammar_selection { + Some(GrammarSelection::Only(selections)) => config + .grammar + .into_iter() + .filter(|grammar| selections.contains(&grammar.grammar_id)) + .collect(), + Some(GrammarSelection::Except(rejections)) => config + .grammar + .into_iter() + .filter(|grammar| !rejections.contains(&grammar.grammar_id)) + .collect(), + None => config.grammar, + }; + + Ok(grammars) +} + +fn run_parallel<F>(grammars: Vec<GrammarConfiguration>, job: F, action: &'static str) -> Result<()> +where + F: Fn(GrammarConfiguration) -> Result<()> + std::marker::Send + 'static + Copy, +{ + let pool = threadpool::Builder::new().build(); + let (tx, rx) = channel(); + + for grammar in grammars { + let tx = tx.clone(); + + pool.execute(move || { + tx.send(job(grammar)).unwrap(); + }); + } + pool.join(); + + // TODO: print all failures instead of the first one found. + if let Some(failure) = rx.try_iter().find_map(|result| result.err()) { + Err(anyhow!( + "Failed to {} some grammar(s).\n{}", + action, + failure + )) + } else { + Ok(()) + } +} + +fn fetch_grammar(grammar: GrammarConfiguration) -> Result<()> { + if let GrammarSource::Git { + remote, revision, .. + } = grammar.source + { + let grammar_dir = crate::runtime_dir() + .join("grammars/sources") + .join(&grammar.grammar_id); + + fs::create_dir_all(&grammar_dir).context(format!( + "Could not create grammar directory {:?}", + grammar_dir + ))?; + + // create the grammar dir contains a git directory + if !grammar_dir.join(".git").is_dir() { + git(&grammar_dir, ["init"])?; + } + + // ensure the remote matches the configured remote + if get_remote_url(&grammar_dir).map_or(true, |s| s != remote) { + set_remote(&grammar_dir, &remote)?; + } + + // ensure the revision matches the configured revision + if get_revision(&grammar_dir).map_or(true, |s| s != revision) { + // Fetch the exact revision from the remote. + // Supported by server-side git since v2.5.0 (July 2015), + // enabled by default on major git hosts. + git(&grammar_dir, ["fetch", REMOTE_NAME, &revision])?; + git(&grammar_dir, ["checkout", &revision])?; + + println!( + "Grammar '{}' checked out at '{}'.", + grammar.grammar_id, revision + ); + Ok(()) + } else { + println!("Grammar '{}' is already up to date.", grammar.grammar_id); + Ok(()) + } + } else { + println!("Skipping local grammar '{}'", grammar.grammar_id); + Ok(()) + } +} + +// Sets the remote for a repository to the given URL, creating the remote if +// it does not yet exist. +fn set_remote(repository_dir: &Path, remote_url: &str) -> Result<String> { + git( + repository_dir, + ["remote", "set-url", REMOTE_NAME, remote_url], + ) + .or_else(|_| git(repository_dir, ["remote", "add", REMOTE_NAME, remote_url])) +} + +fn get_remote_url(repository_dir: &Path) -> Option<String> { + git(repository_dir, ["remote", "get-url", REMOTE_NAME]).ok() +} + +fn get_revision(repository_dir: &Path) -> Option<String> { + git(repository_dir, ["rev-parse", "HEAD"]).ok() +} + +// A wrapper around 'git' commands which returns stdout in success and a +// helpful error message showing the command, stdout, and stderr in error. +fn git<I, S>(repository_dir: &Path, args: I) -> Result<String> +where + I: IntoIterator<Item = S>, + S: AsRef<std::ffi::OsStr>, +{ + let output = Command::new("git") + .args(args) + .current_dir(repository_dir) + .output()?; + + if output.status.success() { + Ok(String::from_utf8_lossy(&output.stdout) + .trim_end() + .to_owned()) + } else { + // TODO: figure out how to display the git command using `args` + Err(anyhow!( + "Git command failed.\nStdout: {}\nStderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + )) + } +} + +fn build_grammar(grammar: GrammarConfiguration) -> Result<()> { + println!("{:#?}", grammar); + let grammar_dir = if let GrammarSource::Local { path } = &grammar.source { + PathBuf::from(&path) + } else { + crate::runtime_dir() + .join("grammars/sources") + .join(&grammar.grammar_id) + }; + + let grammar_dir_entries = grammar_dir.read_dir().with_context(|| { + format!("Failed to read directory {grammar_dir:?}. Did you use 'hx --fetch-grammars'?") + })?; + + if grammar_dir_entries.count() == 0 { + return Err(anyhow!( + "Directory {grammar_dir:?} is empty. Did you use 'hx --fetch-grammars'?" + )); + }; + + let path = match &grammar.source { + GrammarSource::Git { + subpath: Some(subpath), + .. + } => grammar_dir.join(subpath), + _ => grammar_dir, + } + .join("src"); + + build_tree_sitter_library(&path, grammar) +} + +fn build_tree_sitter_library(src_path: &Path, grammar: GrammarConfiguration) -> Result<()> { + let header_path = src_path; + let parser_path = src_path.join("parser.c"); + let mut scanner_path = src_path.join("scanner.c"); + + let scanner_path = if scanner_path.exists() { + Some(scanner_path) + } else { + scanner_path.set_extension("cc"); + if scanner_path.exists() { + Some(scanner_path) + } else { + None + } + }; + let parser_lib_path = crate::runtime_dir().join("grammars"); + let mut library_path = parser_lib_path.join(&grammar.grammar_id); + library_path.set_extension(DYLIB_EXTENSION); + + let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) + .context("Failed to compare source and binary timestamps")?; + + if !recompile { + println!("Grammar '{}' is already built.", grammar.grammar_id); + return Ok(()); + } + + println!("Building grammar '{}'", grammar.grammar_id); + + let mut config = cc::Build::new(); + config + .cpp(true) + .opt_level(3) + .cargo_metadata(false) + .host(BUILD_TARGET) + .target(BUILD_TARGET); + let compiler = config.get_compiler(); + let mut command = Command::new(compiler.path()); + command.current_dir(src_path); + for (key, value) in compiler.env() { + command.env(key, value); + } + + if cfg!(windows) { + command + .args(&["/nologo", "/LD", "/I"]) + .arg(header_path) + .arg("/Od") + .arg("/utf-8"); + if let Some(scanner_path) = scanner_path.as_ref() { + command.arg(scanner_path); + } + + command + .arg(parser_path) + .arg("/link") + .arg(format!("/out:{}", library_path.to_str().unwrap())); + } else { + command + .arg("-shared") + .arg("-fPIC") + .arg("-fno-exceptions") + .arg("-g") + .arg("-I") + .arg(header_path) + .arg("-o") + .arg(&library_path) + .arg("-O3"); + if let Some(scanner_path) = scanner_path.as_ref() { + if scanner_path.extension() == Some("c".as_ref()) { + command.arg("-xc").arg("-std=c99").arg(scanner_path); + } else { + command.arg(scanner_path); + } + } + command.arg("-xc").arg(parser_path); + if cfg!(all(unix, not(target_os = "macos"))) { + command.arg("-Wl,-z,relro,-z,now"); + } + } + + let output = command.output().context("Failed to execute C compiler")?; + if !output.status.success() { + return Err(anyhow!( + "Parser compilation failed.\nStdout: {}\nStderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + )); + } + + Ok(()) +} + +fn needs_recompile( + lib_path: &Path, + parser_c_path: &Path, + scanner_path: &Option<PathBuf>, +) -> Result<bool> { + if !lib_path.exists() { + return Ok(true); + } + let lib_mtime = mtime(lib_path)?; + if mtime(parser_c_path)? > lib_mtime { + return Ok(true); + } + if let Some(scanner_path) = scanner_path { + if mtime(scanner_path)? > lib_mtime { + return Ok(true); + } + } + Ok(false) +} + +fn mtime(path: &Path) -> Result<SystemTime> { + Ok(fs::metadata(path)?.modified()?) +} + +/// Gives the contents of a file from a language's `runtime/queries/<lang>` +/// directory +pub fn load_runtime_file(language: &str, filename: &str) -> Result<String, std::io::Error> { + let path = crate::RUNTIME_DIR + .join("queries") + .join(language) + .join(filename); + std::fs::read_to_string(&path) +} diff --git a/helix-loader/src/lib.rs b/helix-loader/src/lib.rs new file mode 100644 index 00000000..a2c4d96f --- /dev/null +++ b/helix-loader/src/lib.rs @@ -0,0 +1,161 @@ +pub mod grammar; + +use etcetera::base_strategy::{choose_base_strategy, BaseStrategy}; + +pub static RUNTIME_DIR: once_cell::sync::Lazy<std::path::PathBuf> = + once_cell::sync::Lazy::new(runtime_dir); + +pub fn runtime_dir() -> std::path::PathBuf { + if let Ok(dir) = std::env::var("HELIX_RUNTIME") { + return dir.into(); + } + + const RT_DIR: &str = "runtime"; + let conf_dir = config_dir().join(RT_DIR); + if conf_dir.exists() { + return conf_dir; + } + + if let Ok(dir) = std::env::var("CARGO_MANIFEST_DIR") { + // this is the directory of the crate being run by cargo, we need the workspace path so we take the parent + return std::path::PathBuf::from(dir).parent().unwrap().join(RT_DIR); + } + + // fallback to location of the executable being run + std::env::current_exe() + .ok() + .and_then(|path| path.parent().map(|path| path.to_path_buf().join(RT_DIR))) + .unwrap() +} + +pub fn config_dir() -> std::path::PathBuf { + // TODO: allow env var override + let strategy = choose_base_strategy().expect("Unable to find the config directory!"); + let mut path = strategy.config_dir(); + path.push("helix"); + path +} + +pub fn cache_dir() -> std::path::PathBuf { + // TODO: allow env var override + let strategy = choose_base_strategy().expect("Unable to find the config directory!"); + let mut path = strategy.cache_dir(); + path.push("helix"); + path +} + +pub fn config_file() -> std::path::PathBuf { + config_dir().join("config.toml") +} + +pub fn lang_config_file() -> std::path::PathBuf { + config_dir().join("languages.toml") +} + +pub fn log_file() -> std::path::PathBuf { + cache_dir().join("helix.log") +} + +/// Default bultin-in languages.toml. +pub fn default_lang_config() -> toml::Value { + toml::from_slice(include_bytes!("../../languages.toml")) + .expect("Could not parse bultin-in languages.toml to valid toml") +} + +/// User configured languages.toml file, merged with the default config. +pub fn user_lang_config() -> Result<toml::Value, toml::de::Error> { + let def_lang_conf = default_lang_config(); + let data = std::fs::read(crate::config_dir().join("languages.toml")); + let user_lang_conf = match data { + Ok(raw) => { + let value = toml::from_slice(&raw)?; + merge_toml_values(def_lang_conf, value) + } + Err(_) => def_lang_conf, + }; + + Ok(user_lang_conf) +} + +// right overrides left +pub fn merge_toml_values(left: toml::Value, right: toml::Value) -> toml::Value { + use toml::Value; + + fn get_name(v: &Value) -> Option<&str> { + v.get("name").and_then(Value::as_str) + } + + match (left, right) { + (Value::Array(mut left_items), Value::Array(right_items)) => { + left_items.reserve(right_items.len()); + for rvalue in right_items { + let lvalue = get_name(&rvalue) + .and_then(|rname| left_items.iter().position(|v| get_name(v) == Some(rname))) + .map(|lpos| left_items.remove(lpos)); + let mvalue = match lvalue { + Some(lvalue) => merge_toml_values(lvalue, rvalue), + None => rvalue, + }; + left_items.push(mvalue); + } + Value::Array(left_items) + } + (Value::Table(mut left_map), Value::Table(right_map)) => { + for (rname, rvalue) in right_map { + match left_map.remove(&rname) { + Some(lvalue) => { + let merged_value = merge_toml_values(lvalue, rvalue); + left_map.insert(rname, merged_value); + } + None => { + left_map.insert(rname, rvalue); + } + } + } + Value::Table(left_map) + } + // Catch everything else we didn't handle, and use the right value + (_, value) => value, + } +} + +#[cfg(test)] +mod merge_toml_tests { + use super::merge_toml_values; + + #[test] + fn language_tomls() { + use toml::Value; + + const USER: &str = " + [[language]] + name = \"nix\" + test = \"bbb\" + indent = { tab-width = 4, unit = \" \", test = \"aaa\" } + "; + + let base: Value = toml::from_slice(include_bytes!("../../languages.toml")) + .expect("Couldn't parse built-in languages config"); + let user: Value = toml::from_str(USER).unwrap(); + + let merged = merge_toml_values(base, user); + let languages = merged.get("language").unwrap().as_array().unwrap(); + let nix = languages + .iter() + .find(|v| v.get("name").unwrap().as_str().unwrap() == "nix") + .unwrap(); + let nix_indent = nix.get("indent").unwrap(); + + // We changed tab-width and unit in indent so check them if they are the new values + assert_eq!( + nix_indent.get("tab-width").unwrap().as_integer().unwrap(), + 4 + ); + assert_eq!(nix_indent.get("unit").unwrap().as_str().unwrap(), " "); + // We added a new keys, so check them + assert_eq!(nix.get("test").unwrap().as_str().unwrap(), "bbb"); + assert_eq!(nix_indent.get("test").unwrap().as_str().unwrap(), "aaa"); + // We didn't change comment-token so it should be same + assert_eq!(nix.get("comment-token").unwrap().as_str().unwrap(), "#"); + } +} |