From f0c2e898b49b2392d6d1ca02680e88b90e62017e Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Thu, 2 Feb 2023 20:48:16 +0100 Subject: add substring matching options to picker (#5114) --- helix-term/src/ui/fuzzy_match.rs | 237 +++++++++++++++++++++++++++++++++------ 1 file changed, 201 insertions(+), 36 deletions(-) (limited to 'helix-term/src/ui/fuzzy_match.rs') diff --git a/helix-term/src/ui/fuzzy_match.rs b/helix-term/src/ui/fuzzy_match.rs index e25d7328..e6a3f03a 100644 --- a/helix-term/src/ui/fuzzy_match.rs +++ b/helix-term/src/ui/fuzzy_match.rs @@ -4,41 +4,209 @@ use fuzzy_matcher::FuzzyMatcher; #[cfg(test)] mod test; +struct QueryAtom { + kind: QueryAtomKind, + atom: String, + ignore_case: bool, + inverse: bool, +} +impl QueryAtom { + fn new(atom: &str) -> Option { + let mut atom = atom.to_string(); + let inverse = atom.starts_with('!'); + if inverse { + atom.remove(0); + } + + let mut kind = match atom.chars().next() { + Some('^') => QueryAtomKind::Prefix, + Some('\'') => QueryAtomKind::Substring, + _ if inverse => QueryAtomKind::Substring, + _ => QueryAtomKind::Fuzzy, + }; + + if atom.starts_with(&['^', '\'']) { + atom.remove(0); + } + + if atom.is_empty() { + return None; + } + + if atom.ends_with('$') && !atom.ends_with("\\$") { + atom.pop(); + kind = if kind == QueryAtomKind::Prefix { + QueryAtomKind::Exact + } else { + QueryAtomKind::Postfix + } + } + + Some(QueryAtom { + kind, + atom: atom.replace('\\', ""), + // not ideal but fuzzy_matches only knows ascii uppercase so more consistent + // to behave the same + ignore_case: kind != QueryAtomKind::Fuzzy + && atom.chars().all(|c| c.is_ascii_lowercase()), + inverse, + }) + } + + fn indices(&self, matcher: &Matcher, item: &str, indices: &mut Vec) -> bool { + // for inverse there are no indicies to return + // just return whether we matched + if self.inverse { + return self.matches(matcher, item); + } + let buf; + let item = if self.ignore_case { + buf = item.to_ascii_lowercase(); + &buf + } else { + item + }; + let off = match self.kind { + QueryAtomKind::Fuzzy => { + if let Some((_, fuzzy_indices)) = matcher.fuzzy_indices(item, &self.atom) { + indices.extend_from_slice(&fuzzy_indices); + return true; + } else { + return false; + } + } + QueryAtomKind::Substring => { + if let Some(off) = item.find(&self.atom) { + off + } else { + return false; + } + } + QueryAtomKind::Prefix if item.starts_with(&self.atom) => 0, + QueryAtomKind::Postfix if item.ends_with(&self.atom) => item.len() - self.atom.len(), + QueryAtomKind::Exact if item == self.atom => 0, + _ => return false, + }; + + indices.extend(off..(off + self.atom.len())); + true + } + + fn matches(&self, matcher: &Matcher, item: &str) -> bool { + let buf; + let item = if self.ignore_case { + buf = item.to_ascii_lowercase(); + &buf + } else { + item + }; + let mut res = match self.kind { + QueryAtomKind::Fuzzy => matcher.fuzzy_match(item, &self.atom).is_some(), + QueryAtomKind::Substring => item.contains(&self.atom), + QueryAtomKind::Prefix => item.starts_with(&self.atom), + QueryAtomKind::Postfix => item.ends_with(&self.atom), + QueryAtomKind::Exact => item == self.atom, + }; + if self.inverse { + res = !res; + } + res + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +enum QueryAtomKind { + /// Item is a fuzzy match of this behaviour + /// + /// Usage: `foo` + Fuzzy, + /// Item contains query atom as a continous substring + /// + /// Usage `'foo` + Substring, + /// Item starts with query atom + /// + /// Usage: `^foo` + Prefix, + /// Item ends with query atom + /// + /// Usage: `foo$` + Postfix, + /// Item is equal to query atom + /// + /// Usage `^foo$` + Exact, +} + +#[derive(Default)] pub struct FuzzyQuery { - queries: Vec, + first_fuzzy_atom: Option, + query_atoms: Vec, +} + +fn query_atoms(query: &str) -> impl Iterator + '_ { + let mut saw_backslash = false; + query.split(move |c| { + saw_backslash = match c { + ' ' if !saw_backslash => return true, + '\\' => true, + _ => false, + }; + false + }) } impl FuzzyQuery { + pub fn refine(&self, query: &str, old_query: &str) -> (FuzzyQuery, bool) { + // TODO: we could be a lot smarter about this + let new_query = Self::new(query); + let mut is_refinement = query.starts_with(old_query); + + // if the last atom is an inverse atom adding more text to it + // will actually increase the number of matches and we can not refine + // the matches. + if is_refinement && !self.query_atoms.is_empty() { + let last_idx = self.query_atoms.len() - 1; + if self.query_atoms[last_idx].inverse + && self.query_atoms[last_idx].atom != new_query.query_atoms[last_idx].atom + { + is_refinement = false; + } + } + + (new_query, is_refinement) + } + pub fn new(query: &str) -> FuzzyQuery { - let mut saw_backslash = false; - let queries = query - .split(|c| { - saw_backslash = match c { - ' ' if !saw_backslash => return true, - '\\' => true, - _ => false, - }; - false - }) - .filter_map(|query| { - if query.is_empty() { + let mut first_fuzzy_query = None; + let query_atoms = query_atoms(query) + .filter_map(|atom| { + let atom = QueryAtom::new(atom)?; + if atom.kind == QueryAtomKind::Fuzzy && first_fuzzy_query.is_none() { + first_fuzzy_query = Some(atom.atom); None } else { - Some(query.replace("\\ ", " ")) + Some(atom) } }) .collect(); - FuzzyQuery { queries } + FuzzyQuery { + first_fuzzy_atom: first_fuzzy_query, + query_atoms, + } } pub fn fuzzy_match(&self, item: &str, matcher: &Matcher) -> Option { - // use the rank of the first query for the rank, because merging ranks is not really possible + // use the rank of the first fuzzzy query for the rank, because merging ranks is not really possible // this behaviour matches fzf and skim - let score = matcher.fuzzy_match(item, self.queries.get(0)?)?; + let score = self + .first_fuzzy_atom + .as_ref() + .map_or(Some(0), |atom| matcher.fuzzy_match(item, atom))?; if self - .queries + .query_atoms .iter() - .any(|query| matcher.fuzzy_match(item, query).is_none()) + .any(|atom| !atom.matches(matcher, item)) { return None; } @@ -46,29 +214,26 @@ impl FuzzyQuery { } pub fn fuzzy_indicies(&self, item: &str, matcher: &Matcher) -> Option<(i64, Vec)> { - if self.queries.len() == 1 { - return matcher.fuzzy_indices(item, &self.queries[0]); - } - - // use the rank of the first query for the rank, because merging ranks is not really possible - // this behaviour matches fzf and skim - let (score, mut indicies) = matcher.fuzzy_indices(item, self.queries.get(0)?)?; + let (score, mut indices) = self.first_fuzzy_atom.as_ref().map_or_else( + || Some((0, Vec::new())), + |atom| matcher.fuzzy_indices(item, atom), + )?; - // fast path for the common case of not using a space - // during matching this branch should be free thanks to branch prediction - if self.queries.len() == 1 { - return Some((score, indicies)); + // fast path for the common case of just a single atom + if self.query_atoms.is_empty() { + return Some((score, indices)); } - for query in &self.queries[1..] { - let (_, matched_indicies) = matcher.fuzzy_indices(item, query)?; - indicies.extend_from_slice(&matched_indicies); + for atom in &self.query_atoms { + if !atom.indices(matcher, item, &mut indices) { + return None; + } } // deadup and remove duplicate matches - indicies.sort_unstable(); - indicies.dedup(); + indices.sort_unstable(); + indices.dedup(); - Some((score, indicies)) + Some((score, indices)) } } -- cgit v1.2.3-70-g09d2