aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPascal Kuthe2023-03-13 18:27:54 +0000
committerBlaž Hrastnik2023-03-16 06:46:08 +0000
commit90348b889f397f2753473764c2d7a02986beddeb (patch)
treeb4da76ddd1a9daf0ff8ca0d572f6a9926cc99d06
parentbbf480007d87631ef7b7f93cef170ec1af961406 (diff)
revamped snippet text element parsing
Snippet text elements can contain escape sequences that must be treated properly. Furthermore snippets must always escape certain characters (like `}` or `\`). The function has been updated to account for that. `text` is now also included with `anything` to match the grammar and can also match empty text. To avoid infinite loops the `non-empty` combinator has been added which is automatically used in the `one_or_more` and `zero_or more` combinator where the problemn would occur.
-rw-r--r--helix-lsp/src/snippet.rs117
-rw-r--r--helix-parsec/src/lib.rs13
2 files changed, 85 insertions, 45 deletions
diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs
index 77f44d4e..f64f29f2 100644
--- a/helix-lsp/src/snippet.rs
+++ b/helix-lsp/src/snippet.rs
@@ -12,7 +12,7 @@ pub enum CaseChange {
#[derive(Debug, PartialEq, Eq)]
pub enum FormatItem<'a> {
- Text(&'a str),
+ Text(Tendril),
Capture(usize),
CaseChange(usize, CaseChange),
Conditional(usize, Option<&'a str>, Option<&'a str>),
@@ -20,9 +20,9 @@ pub enum FormatItem<'a> {
#[derive(Debug, PartialEq, Eq)]
pub struct Regex<'a> {
- value: &'a str,
+ value: Tendril,
replacement: Vec<FormatItem<'a>>,
- options: Option<&'a str>,
+ options: Tendril,
}
#[derive(Debug, PartialEq, Eq)]
@@ -36,14 +36,14 @@ pub enum SnippetElement<'a> {
},
Choice {
tabstop: usize,
- choices: Vec<&'a str>,
+ choices: Vec<Tendril>,
},
Variable {
name: &'a str,
default: Option<&'a str>,
regex: Option<Regex<'a>>,
},
- Text(&'a str),
+ Text(Tendril),
}
#[derive(Debug, PartialEq, Eq)]
@@ -67,12 +67,12 @@ fn render_elements(
for element in snippet_elements {
match element {
- &Text(text) => {
+ Text(text) => {
// small optimization to avoid calling replace when it's unnecessary
let text = if text.contains('\n') {
Cow::Owned(text.replace('\n', newline_with_offset))
} else {
- Cow::Borrowed(text)
+ Cow::Borrowed(text.as_str())
};
*offset += text.chars().count();
insert.push_str(&text);
@@ -160,6 +160,7 @@ pub fn render(
}
mod parser {
+ use helix_core::Tendril;
use helix_parsec::*;
use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
@@ -210,8 +211,32 @@ mod parser {
}
}
- fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> {
- take_while(move |c| cs.into_iter().all(|c1| c != c1))
+ const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
+ const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/'];
+ const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ','];
+
+ fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> {
+ move |input: &'a str| {
+ let mut chars = input.char_indices();
+ let mut res = Tendril::new();
+ while let Some((i, c)) = chars.next() {
+ match c {
+ '\\' => {
+ if let Some((_, c)) = chars.next() {
+ if escape_chars.contains(&c) {
+ res.push(c);
+ continue;
+ }
+ }
+ return Ok((&input[i..], res));
+ }
+ c if escape_chars.contains(&c) => return Ok((&input[i..], res)),
+ c => res.push(c),
+ }
+ }
+
+ Ok(("", res))
+ }
}
fn digit<'a>() -> impl Parser<'a, Output = usize> {
@@ -274,20 +299,18 @@ mod parser {
}
fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> {
- let text = map(text(['$', '/']), FormatItem::Text);
- let replacement = reparse_as(
- take_until(|c| c == '/'),
- one_or_more(choice!(format(), text)),
- );
-
map(
seq!(
"/",
- take_until(|c| c == '/'),
+ // TODO parse as ECMAScript and convert to rust regex
+ non_empty(text(&['/', '\\'])),
"/",
- replacement,
+ one_or_more(choice!(
+ format(),
+ map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text)
+ )),
"/",
- optional(take_until(|c| c == '}')),
+ text(&['}', '\\',]),
),
|(_, value, _, replacement, _, options)| Regex {
value,
@@ -308,13 +331,12 @@ mod parser {
}
fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
- let text = map(text(['$', '}']), SnippetElement::Text);
map(
seq!(
"${",
digit(),
":",
- one_or_more(choice!(anything(), text)),
+ one_or_more(anything(TEXT_ESCAPE_CHARS)),
"}"
),
|seq| SnippetElement::Placeholder {
@@ -330,7 +352,7 @@ mod parser {
"${",
digit(),
"|",
- sep(take_until(|c| c == ',' || c == '|'), ","),
+ sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","),
"|}",
),
|seq| SnippetElement::Choice {
@@ -368,17 +390,21 @@ mod parser {
)
}
- fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
- // The parser has to be constructed lazily to avoid infinite opaque type recursion
- |input: &'a str| {
- let parser = choice!(tabstop(), placeholder(), choice(), variable());
+ fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> {
+ move |input: &'a str| {
+ let parser = choice!(
+ tabstop(),
+ placeholder(),
+ choice(),
+ variable(),
+ map(text(escape_chars), SnippetElement::Text)
+ );
parser.parse(input)
}
}
fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
- let text = map(text(['$']), SnippetElement::Text);
- map(one_or_more(choice!(anything(), text)), |parts| Snippet {
+ map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet {
elements: parts,
})
}
@@ -392,6 +418,7 @@ mod parser {
}
})
}
+
#[cfg(test)]
mod test {
use super::SnippetElement::*;
@@ -407,12 +434,12 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
- Text("match("),
+ Text("match(".into()),
Placeholder {
tabstop: 1,
- value: vec!(Text("Arg1")),
+ value: vec!(Text("Arg1".into())),
},
- Text(")")
+ Text(")".into())
]
}),
parse("match(${1:Arg1})")
@@ -446,15 +473,15 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
- Text("local "),
+ Text("local ".into()),
Placeholder {
tabstop: 1,
- value: vec!(Text("var")),
+ value: vec!(Text("var".into())),
},
- Text(" = "),
+ Text(" = ".into()),
Placeholder {
tabstop: 1,
- value: vec!(Text("value")),
+ value: vec!(Text("value".into())),
},
]
}),
@@ -468,7 +495,7 @@ mod parser {
Ok(Snippet {
elements: vec![Placeholder {
tabstop: 1,
- value: vec!(Text("var, "), Tabstop { tabstop: 2 },),
+ value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
},]
}),
parse("${1:var, $2}")
@@ -482,10 +509,10 @@ mod parser {
elements: vec![Placeholder {
tabstop: 1,
value: vec!(
- Text("foo "),
+ Text("foo ".into()),
Placeholder {
tabstop: 2,
- value: vec!(Text("bar")),
+ value: vec!(Text("bar".into())),
},
),
},]
@@ -499,27 +526,27 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
- Text("hello "),
+ Text("hello ".into()),
Tabstop { tabstop: 1 },
Tabstop { tabstop: 2 },
- Text(" "),
+ Text(" ".into()),
Choice {
tabstop: 1,
- choices: vec!["one", "two", "three"]
+ choices: vec!["one".into(), "two".into(), "three".into()]
},
- Text(" "),
+ Text(" ".into()),
Variable {
name: "name",
default: Some("foo"),
regex: None
},
- Text(" "),
+ Text(" ".into()),
Variable {
name: "var",
default: None,
regex: None
},
- Text(" "),
+ Text(" ".into()),
Variable {
name: "TM",
default: None,
@@ -539,9 +566,9 @@ mod parser {
name: "TM_FILENAME",
default: None,
regex: Some(Regex {
- value: "(.*).+$",
+ value: "(.*).+$".into(),
replacement: vec![FormatItem::Capture(1)],
- options: None,
+ options: Tendril::new(),
}),
}]
}),
diff --git a/helix-parsec/src/lib.rs b/helix-parsec/src/lib.rs
index e09814b8..846d02d6 100644
--- a/helix-parsec/src/lib.rs
+++ b/helix-parsec/src/lib.rs
@@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where
P: Parser<'a, Output = T>,
{
+ let parser = non_empty(parser);
move |mut input| {
let mut values = Vec::new();
@@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where
P: Parser<'a, Output = T>,
{
+ let parser = non_empty(parser);
move |mut input| {
let mut values = Vec::new();
@@ -559,3 +561,14 @@ where
Ok((input, values))
}
}
+
+pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> {
+ move |input| {
+ let (new_input, res) = p.parse(input)?;
+ if new_input.len() == input.len() {
+ Err(input)
+ } else {
+ Ok((new_input, res))
+ }
+ }
+}