From 4fc991fdeca5db36bd7be7197510e62a019e1677 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 16 Feb 2022 07:57:20 -0600 Subject: migrate grammar fetching/building code into helix-loader crate This is a rather large refactor that moves most of the code for loading, fetching, and building grammars into a new helix-loader module. This works well with the [[grammars]] syntax for languages.toml defined earlier: we only have to depend on the types for GrammarConfiguration in helix-loader and can leave all the [[language]] entries for helix-core. --- helix-core/Cargo.toml | 6 +- helix-core/src/config.rs | 27 +-------- helix-core/src/indent.rs | 2 - helix-core/src/lib.rs | 141 ----------------------------------------------- helix-core/src/path.rs | 5 +- helix-core/src/syntax.rs | 91 ++---------------------------- 6 files changed, 14 insertions(+), 258 deletions(-) (limited to 'helix-core') diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 5582d38b..8152da57 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -13,6 +13,8 @@ include = ["src/**/*", "README.md"] [features] [dependencies] +helix-loader = { version = "0.6", path = "../helix-loader" } + ropey = "1.3" smallvec = "1.8" smartstring = "1.0.0" @@ -33,13 +35,11 @@ toml = "0.5" similar = "2.1" -etcetera = "0.3" encoding_rs = "0.8" chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] } -libloading = "0.7" -anyhow = "1" +etcetera = "0.3" [dev-dependencies] quickcheck = { version = "1", default-features = false } diff --git a/helix-core/src/config.rs b/helix-core/src/config.rs index d4ebee1f..f399850e 100644 --- a/helix-core/src/config.rs +++ b/helix-core/src/config.rs @@ -1,33 +1,10 @@ -use crate::merge_toml_values; - -/// Default bultin-in languages.toml. -pub fn default_lang_config() -> toml::Value { - toml::from_slice(include_bytes!("../../languages.toml")) - .expect("Could not parse bultin-in languages.toml to valid toml") -} - -/// User configured languages.toml file, merged with the default config. -pub fn user_lang_config() -> Result { - let def_lang_conf = default_lang_config(); - let data = std::fs::read(crate::config_dir().join("languages.toml")); - let user_lang_conf = match data { - Ok(raw) => { - let value = toml::from_slice(&raw)?; - merge_toml_values(def_lang_conf, value) - } - Err(_) => def_lang_conf, - }; - - Ok(user_lang_conf) -} - /// Syntax configuration loader based on built-in languages.toml. pub fn default_syntax_loader() -> crate::syntax::Configuration { - default_lang_config() + helix_loader::default_lang_config() .try_into() .expect("Could not serialize built-in languages.toml") } /// Syntax configuration loader based on user configured languages.toml. pub fn user_syntax_loader() -> Result { - user_lang_config()?.try_into() + helix_loader::user_lang_config()?.try_into() } diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index ee9cbb16..30f4a340 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -444,8 +444,6 @@ where debugger: None, auto_pairs: None, }], - grammar: vec![], - grammar_selection: None, }); // set runtime path so we can find the queries diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index c3a349c1..1f43c266 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -33,9 +33,6 @@ pub mod unicode { pub use unicode_width as width; } -static RUNTIME_DIR: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(runtime_dir); - pub fn find_first_non_whitespace_char(line: RopeSlice) -> Option { line.chars().position(|ch| !ch.is_whitespace()) } @@ -85,144 +82,6 @@ pub fn find_root(root: Option<&str>, root_markers: &[String]) -> Option std::path::PathBuf { - if let Ok(dir) = std::env::var("HELIX_RUNTIME") { - return dir.into(); - } - - const RT_DIR: &str = "runtime"; - let conf_dir = config_dir().join(RT_DIR); - if conf_dir.exists() { - return conf_dir; - } - - if let Ok(dir) = std::env::var("CARGO_MANIFEST_DIR") { - // this is the directory of the crate being run by cargo, we need the workspace path so we take the parent - return std::path::PathBuf::from(dir).parent().unwrap().join(RT_DIR); - } - - // fallback to location of the executable being run - std::env::current_exe() - .ok() - .and_then(|path| path.parent().map(|path| path.to_path_buf().join(RT_DIR))) - .unwrap() -} - -pub fn config_dir() -> std::path::PathBuf { - // TODO: allow env var override - let strategy = choose_base_strategy().expect("Unable to find the config directory!"); - let mut path = strategy.config_dir(); - path.push("helix"); - path -} - -pub fn cache_dir() -> std::path::PathBuf { - // TODO: allow env var override - let strategy = choose_base_strategy().expect("Unable to find the config directory!"); - let mut path = strategy.cache_dir(); - path.push("helix"); - path -} - -pub fn config_file() -> std::path::PathBuf { - config_dir().join("config.toml") -} - -pub fn lang_config_file() -> std::path::PathBuf { - config_dir().join("languages.toml") -} - -pub fn log_file() -> std::path::PathBuf { - cache_dir().join("helix.log") -} - -// right overrides left -pub fn merge_toml_values(left: toml::Value, right: toml::Value) -> toml::Value { - use toml::Value; - - fn get_name(v: &Value) -> Option<&str> { - v.get("name").and_then(Value::as_str) - } - - match (left, right) { - (Value::Array(mut left_items), Value::Array(right_items)) => { - left_items.reserve(right_items.len()); - for rvalue in right_items { - let lvalue = get_name(&rvalue) - .and_then(|rname| left_items.iter().position(|v| get_name(v) == Some(rname))) - .map(|lpos| left_items.remove(lpos)); - let mvalue = match lvalue { - Some(lvalue) => merge_toml_values(lvalue, rvalue), - None => rvalue, - }; - left_items.push(mvalue); - } - Value::Array(left_items) - } - (Value::Table(mut left_map), Value::Table(right_map)) => { - for (rname, rvalue) in right_map { - match left_map.remove(&rname) { - Some(lvalue) => { - let merged_value = merge_toml_values(lvalue, rvalue); - left_map.insert(rname, merged_value); - } - None => { - left_map.insert(rname, rvalue); - } - } - } - Value::Table(left_map) - } - // Catch everything else we didn't handle, and use the right value - (_, value) => value, - } -} - -#[cfg(test)] -mod merge_toml_tests { - use super::merge_toml_values; - - #[test] - fn language_tomls() { - use toml::Value; - - const USER: &str = " - [[language]] - name = \"nix\" - test = \"bbb\" - indent = { tab-width = 4, unit = \" \", test = \"aaa\" } - "; - - let base: Value = toml::from_slice(include_bytes!("../../languages.toml")) - .expect("Couldn't parse built-in languages config"); - let user: Value = toml::from_str(USER).unwrap(); - - let merged = merge_toml_values(base, user); - let languages = merged.get("language").unwrap().as_array().unwrap(); - let nix = languages - .iter() - .find(|v| v.get("name").unwrap().as_str().unwrap() == "nix") - .unwrap(); - let nix_indent = nix.get("indent").unwrap(); - - // We changed tab-width and unit in indent so check them if they are the new values - assert_eq!( - nix_indent.get("tab-width").unwrap().as_integer().unwrap(), - 4 - ); - assert_eq!(nix_indent.get("unit").unwrap().as_str().unwrap(), " "); - // We added a new keys, so check them - assert_eq!(nix.get("test").unwrap().as_str().unwrap(), "bbb"); - assert_eq!(nix_indent.get("test").unwrap().as_str().unwrap(), "aaa"); - // We didn't change comment-token so it should be same - assert_eq!(nix.get("comment-token").unwrap().as_str().unwrap(), "#"); - } -} - -pub use etcetera::home_dir; - -use etcetera::base_strategy::{choose_base_strategy, BaseStrategy}; - pub use ropey::{Rope, RopeBuilder, RopeSlice}; // pub use tendril::StrTendril as Tendril; diff --git a/helix-core/src/path.rs b/helix-core/src/path.rs index a6644465..e0c3bef6 100644 --- a/helix-core/src/path.rs +++ b/helix-core/src/path.rs @@ -1,9 +1,10 @@ +use etcetera::home_dir; use std::path::{Component, Path, PathBuf}; /// Replaces users home directory from `path` with tilde `~` if the directory /// is available, otherwise returns the path unchanged. pub fn fold_home_dir(path: &Path) -> PathBuf { - if let Ok(home) = super::home_dir() { + if let Ok(home) = home_dir() { if path.starts_with(&home) { // it's ok to unwrap, the path starts with home dir return PathBuf::from("~").join(path.strip_prefix(&home).unwrap()); @@ -20,7 +21,7 @@ pub fn expand_tilde(path: &Path) -> PathBuf { let mut components = path.components().peekable(); if let Some(Component::Normal(c)) = components.peek() { if c == &"~" { - if let Ok(home) = super::home_dir() { + if let Ok(home) = home_dir() { // it's ok to unwrap, the path starts with `~` return home.join(path.strip_prefix("~").unwrap()); } diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 28aa31f9..6ae46d4f 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -7,10 +7,6 @@ use crate::{ Rope, RopeSlice, Tendril, }; -use anyhow::{Context, Result}; -use libloading::{Library, Symbol}; -use tree_sitter::Language; - use arc_swap::{ArcSwap, Guard}; use slotmap::{DefaultKey as LayerId, HopSlotMap}; @@ -27,33 +23,7 @@ use std::{ use once_cell::sync::{Lazy, OnceCell}; use serde::{Deserialize, Serialize}; -#[cfg(unix)] -pub const DYLIB_EXTENSION: &str = "so"; - -#[cfg(windows)] -pub const DYLIB_EXTENSION: &str = "dll"; - -fn replace_dashes_with_underscores(name: &str) -> String { - name.replace('-', "_") -} - -pub fn get_language(runtime_path: &std::path::Path, name: &str) -> Result { - let name = name.to_ascii_lowercase(); - let mut library_path = runtime_path.join("grammars").join(&name); - library_path.set_extension(DYLIB_EXTENSION); - - let library = unsafe { Library::new(&library_path) } - .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?; - let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(&name)); - let language = unsafe { - let language_fn: Symbol Language> = library - .get(language_fn_name.as_bytes()) - .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; - language_fn() - }; - std::mem::forget(library); - Ok(language) -} +use helix_loader::grammar::{get_language, load_runtime_file}; fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> where @@ -81,19 +51,8 @@ where } #[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", deny_unknown_fields)] pub struct Configuration { - #[serde(rename = "use-grammars")] - pub grammar_selection: Option, pub language: Vec, - pub grammar: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "lowercase", untagged)] -pub enum GrammarSelection { - Only(HashSet), - Except(HashSet), } // largely based on tree-sitter/cli/src/loader.rs @@ -279,29 +238,6 @@ pub struct IndentQuery { pub outdent: HashSet, } -#[derive(Debug, Serialize, Deserialize)] -pub struct GrammarConfiguration { - #[serde(rename = "name")] - pub grammar_id: String, // c-sharp, rust - pub source: GrammarSource, - pub path: Option, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -#[serde(untagged)] -pub enum GrammarSource { - Local { - path: String, - }, - Git { - #[serde(rename = "git")] - remote: String, - #[serde(rename = "rev")] - revision: String, - }, -} - #[derive(Debug)] pub struct TextObjectQuery { pub query: Query, @@ -398,14 +334,6 @@ impl TextObjectQuery { } } -pub fn load_runtime_file(language: &str, filename: &str) -> Result { - let path = crate::RUNTIME_DIR - .join("queries") - .join(language) - .join(filename); - std::fs::read_to_string(&path) -} - fn read_query(language: &str, filename: &str) -> String { static INHERITS_REGEX: Lazy = Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()]+)\s*").unwrap()); @@ -451,12 +379,9 @@ impl LanguageConfiguration { if highlights_query.is_empty() { None } else { - let language = get_language( - &crate::RUNTIME_DIR, - self.grammar.as_deref().unwrap_or(&self.language_id), - ) - .map_err(|e| log::info!("{}", e)) - .ok()?; + let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) + .map_err(|e| log::info!("{}", e)) + .ok()?; let config = HighlightConfiguration::new( language, &highlights_query, @@ -2116,13 +2041,9 @@ mod test { .map(String::from) .collect(); - let loader = Loader::new(Configuration { - language: vec![], - grammar: vec![], - grammar_selection: None, - }); + let loader = Loader::new(Configuration { language: vec![] }); - let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap(); + let language = get_language("Rust").unwrap(); let config = HighlightConfiguration::new( language, &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") -- cgit v1.2.3-70-g09d2