aboutsummaryrefslogtreecommitdiff
path: root/helix-loader/src/grammar.rs
diff options
context:
space:
mode:
Diffstat (limited to 'helix-loader/src/grammar.rs')
-rw-r--r--helix-loader/src/grammar.rs388
1 files changed, 388 insertions, 0 deletions
diff --git a/helix-loader/src/grammar.rs b/helix-loader/src/grammar.rs
new file mode 100644
index 00000000..61ef464f
--- /dev/null
+++ b/helix-loader/src/grammar.rs
@@ -0,0 +1,388 @@
+use anyhow::{anyhow, Context, Result};
+use libloading::{Library, Symbol};
+use serde::{Deserialize, Serialize};
+use std::fs;
+use std::time::SystemTime;
+use std::{
+ collections::HashSet,
+ path::{Path, PathBuf},
+ process::Command,
+ sync::mpsc::channel,
+};
+use tree_sitter::Language;
+
+#[cfg(unix)]
+const DYLIB_EXTENSION: &str = "so";
+
+#[cfg(windows)]
+const DYLIB_EXTENSION: &str = "dll";
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Configuration {
+ #[serde(rename = "use-grammars")]
+ pub grammar_selection: Option<GrammarSelection>,
+ pub grammar: Vec<GrammarConfiguration>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase", untagged)]
+pub enum GrammarSelection {
+ Only(HashSet<String>),
+ Except(HashSet<String>),
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct GrammarConfiguration {
+ #[serde(rename = "name")]
+ pub grammar_id: String,
+ pub source: GrammarSource,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase", untagged)]
+pub enum GrammarSource {
+ Local {
+ path: String,
+ },
+ Git {
+ #[serde(rename = "git")]
+ remote: String,
+ #[serde(rename = "rev")]
+ revision: String,
+ subpath: Option<String>,
+ },
+}
+
+const BUILD_TARGET: &str = env!("BUILD_TARGET");
+const REMOTE_NAME: &str = "origin";
+
+pub fn get_language(name: &str) -> Result<Language> {
+ let name = name.to_ascii_lowercase();
+ let mut library_path = crate::runtime_dir().join("grammars").join(&name);
+ library_path.set_extension(DYLIB_EXTENSION);
+
+ let library = unsafe { Library::new(&library_path) }
+ .with_context(|| format!("Error opening dynamic library {library_path:?}"))?;
+ let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_"));
+ let language = unsafe {
+ let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
+ .get(language_fn_name.as_bytes())
+ .with_context(|| format!("Failed to load symbol {language_fn_name}"))?;
+ language_fn()
+ };
+ std::mem::forget(library);
+ Ok(language)
+}
+
+pub fn fetch_grammars() -> Result<()> {
+ run_parallel(get_grammar_configs()?, fetch_grammar, "fetch")
+}
+
+pub fn build_grammars() -> Result<()> {
+ run_parallel(get_grammar_configs()?, build_grammar, "build")
+}
+
+// Returns the set of grammar configurations the user requests.
+// Grammars are configured in the default and user `languages.toml` and are
+// merged. The `grammar_selection` key of the config is then used to filter
+// down all grammars into a subset of the user's choosing.
+fn get_grammar_configs() -> Result<Vec<GrammarConfiguration>> {
+ let config: Configuration = crate::user_lang_config()
+ .context("Could not parse languages.toml")?
+ .try_into()?;
+
+ let grammars = match config.grammar_selection {
+ Some(GrammarSelection::Only(selections)) => config
+ .grammar
+ .into_iter()
+ .filter(|grammar| selections.contains(&grammar.grammar_id))
+ .collect(),
+ Some(GrammarSelection::Except(rejections)) => config
+ .grammar
+ .into_iter()
+ .filter(|grammar| !rejections.contains(&grammar.grammar_id))
+ .collect(),
+ None => config.grammar,
+ };
+
+ Ok(grammars)
+}
+
+fn run_parallel<F>(grammars: Vec<GrammarConfiguration>, job: F, action: &'static str) -> Result<()>
+where
+ F: Fn(GrammarConfiguration) -> Result<()> + std::marker::Send + 'static + Copy,
+{
+ let pool = threadpool::Builder::new().build();
+ let (tx, rx) = channel();
+
+ for grammar in grammars {
+ let tx = tx.clone();
+
+ pool.execute(move || {
+ tx.send(job(grammar)).unwrap();
+ });
+ }
+ pool.join();
+
+ // TODO: print all failures instead of the first one found.
+ if let Some(failure) = rx.try_iter().find_map(|result| result.err()) {
+ Err(anyhow!(
+ "Failed to {} some grammar(s).\n{}",
+ action,
+ failure
+ ))
+ } else {
+ Ok(())
+ }
+}
+
+fn fetch_grammar(grammar: GrammarConfiguration) -> Result<()> {
+ if let GrammarSource::Git {
+ remote, revision, ..
+ } = grammar.source
+ {
+ let grammar_dir = crate::runtime_dir()
+ .join("grammars/sources")
+ .join(&grammar.grammar_id);
+
+ fs::create_dir_all(&grammar_dir).context(format!(
+ "Could not create grammar directory {:?}",
+ grammar_dir
+ ))?;
+
+ // create the grammar dir contains a git directory
+ if !grammar_dir.join(".git").is_dir() {
+ git(&grammar_dir, ["init"])?;
+ }
+
+ // ensure the remote matches the configured remote
+ if get_remote_url(&grammar_dir).map_or(true, |s| s != remote) {
+ set_remote(&grammar_dir, &remote)?;
+ }
+
+ // ensure the revision matches the configured revision
+ if get_revision(&grammar_dir).map_or(true, |s| s != revision) {
+ // Fetch the exact revision from the remote.
+ // Supported by server-side git since v2.5.0 (July 2015),
+ // enabled by default on major git hosts.
+ git(&grammar_dir, ["fetch", REMOTE_NAME, &revision])?;
+ git(&grammar_dir, ["checkout", &revision])?;
+
+ println!(
+ "Grammar '{}' checked out at '{}'.",
+ grammar.grammar_id, revision
+ );
+ Ok(())
+ } else {
+ println!("Grammar '{}' is already up to date.", grammar.grammar_id);
+ Ok(())
+ }
+ } else {
+ println!("Skipping local grammar '{}'", grammar.grammar_id);
+ Ok(())
+ }
+}
+
+// Sets the remote for a repository to the given URL, creating the remote if
+// it does not yet exist.
+fn set_remote(repository_dir: &Path, remote_url: &str) -> Result<String> {
+ git(
+ repository_dir,
+ ["remote", "set-url", REMOTE_NAME, remote_url],
+ )
+ .or_else(|_| git(repository_dir, ["remote", "add", REMOTE_NAME, remote_url]))
+}
+
+fn get_remote_url(repository_dir: &Path) -> Option<String> {
+ git(repository_dir, ["remote", "get-url", REMOTE_NAME]).ok()
+}
+
+fn get_revision(repository_dir: &Path) -> Option<String> {
+ git(repository_dir, ["rev-parse", "HEAD"]).ok()
+}
+
+// A wrapper around 'git' commands which returns stdout in success and a
+// helpful error message showing the command, stdout, and stderr in error.
+fn git<I, S>(repository_dir: &Path, args: I) -> Result<String>
+where
+ I: IntoIterator<Item = S>,
+ S: AsRef<std::ffi::OsStr>,
+{
+ let output = Command::new("git")
+ .args(args)
+ .current_dir(repository_dir)
+ .output()?;
+
+ if output.status.success() {
+ Ok(String::from_utf8_lossy(&output.stdout)
+ .trim_end()
+ .to_owned())
+ } else {
+ // TODO: figure out how to display the git command using `args`
+ Err(anyhow!(
+ "Git command failed.\nStdout: {}\nStderr: {}",
+ String::from_utf8_lossy(&output.stdout),
+ String::from_utf8_lossy(&output.stderr),
+ ))
+ }
+}
+
+fn build_grammar(grammar: GrammarConfiguration) -> Result<()> {
+ println!("{:#?}", grammar);
+ let grammar_dir = if let GrammarSource::Local { path } = &grammar.source {
+ PathBuf::from(&path)
+ } else {
+ crate::runtime_dir()
+ .join("grammars/sources")
+ .join(&grammar.grammar_id)
+ };
+
+ let grammar_dir_entries = grammar_dir.read_dir().with_context(|| {
+ format!("Failed to read directory {grammar_dir:?}. Did you use 'hx --fetch-grammars'?")
+ })?;
+
+ if grammar_dir_entries.count() == 0 {
+ return Err(anyhow!(
+ "Directory {grammar_dir:?} is empty. Did you use 'hx --fetch-grammars'?"
+ ));
+ };
+
+ let path = match &grammar.source {
+ GrammarSource::Git {
+ subpath: Some(subpath),
+ ..
+ } => grammar_dir.join(subpath),
+ _ => grammar_dir,
+ }
+ .join("src");
+
+ build_tree_sitter_library(&path, grammar)
+}
+
+fn build_tree_sitter_library(src_path: &Path, grammar: GrammarConfiguration) -> Result<()> {
+ let header_path = src_path;
+ let parser_path = src_path.join("parser.c");
+ let mut scanner_path = src_path.join("scanner.c");
+
+ let scanner_path = if scanner_path.exists() {
+ Some(scanner_path)
+ } else {
+ scanner_path.set_extension("cc");
+ if scanner_path.exists() {
+ Some(scanner_path)
+ } else {
+ None
+ }
+ };
+ let parser_lib_path = crate::runtime_dir().join("grammars");
+ let mut library_path = parser_lib_path.join(&grammar.grammar_id);
+ library_path.set_extension(DYLIB_EXTENSION);
+
+ let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
+ .context("Failed to compare source and binary timestamps")?;
+
+ if !recompile {
+ println!("Grammar '{}' is already built.", grammar.grammar_id);
+ return Ok(());
+ }
+
+ println!("Building grammar '{}'", grammar.grammar_id);
+
+ let mut config = cc::Build::new();
+ config
+ .cpp(true)
+ .opt_level(3)
+ .cargo_metadata(false)
+ .host(BUILD_TARGET)
+ .target(BUILD_TARGET);
+ let compiler = config.get_compiler();
+ let mut command = Command::new(compiler.path());
+ command.current_dir(src_path);
+ for (key, value) in compiler.env() {
+ command.env(key, value);
+ }
+
+ if cfg!(windows) {
+ command
+ .args(&["/nologo", "/LD", "/I"])
+ .arg(header_path)
+ .arg("/Od")
+ .arg("/utf-8");
+ if let Some(scanner_path) = scanner_path.as_ref() {
+ command.arg(scanner_path);
+ }
+
+ command
+ .arg(parser_path)
+ .arg("/link")
+ .arg(format!("/out:{}", library_path.to_str().unwrap()));
+ } else {
+ command
+ .arg("-shared")
+ .arg("-fPIC")
+ .arg("-fno-exceptions")
+ .arg("-g")
+ .arg("-I")
+ .arg(header_path)
+ .arg("-o")
+ .arg(&library_path)
+ .arg("-O3");
+ if let Some(scanner_path) = scanner_path.as_ref() {
+ if scanner_path.extension() == Some("c".as_ref()) {
+ command.arg("-xc").arg("-std=c99").arg(scanner_path);
+ } else {
+ command.arg(scanner_path);
+ }
+ }
+ command.arg("-xc").arg(parser_path);
+ if cfg!(all(unix, not(target_os = "macos"))) {
+ command.arg("-Wl,-z,relro,-z,now");
+ }
+ }
+
+ let output = command.output().context("Failed to execute C compiler")?;
+ if !output.status.success() {
+ return Err(anyhow!(
+ "Parser compilation failed.\nStdout: {}\nStderr: {}",
+ String::from_utf8_lossy(&output.stdout),
+ String::from_utf8_lossy(&output.stderr)
+ ));
+ }
+
+ Ok(())
+}
+
+fn needs_recompile(
+ lib_path: &Path,
+ parser_c_path: &Path,
+ scanner_path: &Option<PathBuf>,
+) -> Result<bool> {
+ if !lib_path.exists() {
+ return Ok(true);
+ }
+ let lib_mtime = mtime(lib_path)?;
+ if mtime(parser_c_path)? > lib_mtime {
+ return Ok(true);
+ }
+ if let Some(scanner_path) = scanner_path {
+ if mtime(scanner_path)? > lib_mtime {
+ return Ok(true);
+ }
+ }
+ Ok(false)
+}
+
+fn mtime(path: &Path) -> Result<SystemTime> {
+ Ok(fs::metadata(path)?.modified()?)
+}
+
+/// Gives the contents of a file from a language's `runtime/queries/<lang>`
+/// directory
+pub fn load_runtime_file(language: &str, filename: &str) -> Result<String, std::io::Error> {
+ let path = crate::RUNTIME_DIR
+ .join("queries")
+ .join(language)
+ .join(filename);
+ std::fs::read_to_string(&path)
+}