diff options
Diffstat (limited to 'helix-core/src/shellwords.rs')
-rw-r--r-- | helix-core/src/shellwords.rs | 331 |
1 files changed, 171 insertions, 160 deletions
diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs index 7742896c..9475f5e5 100644 --- a/helix-core/src/shellwords.rs +++ b/helix-core/src/shellwords.rs @@ -27,181 +27,172 @@ enum State { DquoteEscaped, } -/// Get the vec of escaped / quoted / doublequoted filenames from the input str -pub fn shellwords(input: &str) -> Vec<Cow<'_, str>> { - use State::*; +pub struct Shellwords<'a> { + state: State, + /// Shellwords where whitespace and escapes has been resolved. + words: Vec<Cow<'a, str>>, + /// The parts of the input that are divided into shellwords. This can be + /// used to retrieve the original text for a given word by looking up the + /// same index in the Vec as the word in `words`. + parts: Vec<&'a str>, +} - let mut state = Unquoted; - let mut args: Vec<Cow<str>> = Vec::new(); - let mut escaped = String::with_capacity(input.len()); +impl<'a> From<&'a str> for Shellwords<'a> { + fn from(input: &'a str) -> Self { + use State::*; - let mut start = 0; - let mut end = 0; + let mut state = Unquoted; + let mut words = Vec::new(); + let mut parts = Vec::new(); + let mut escaped = String::with_capacity(input.len()); - for (i, c) in input.char_indices() { - state = match state { - OnWhitespace => match c { - '"' => { - end = i; - Dquoted - } - '\'' => { - end = i; - Quoted - } - '\\' => { - if cfg!(unix) { - escaped.push_str(&input[start..i]); - start = i + 1; - UnquotedEscaped - } else { + let mut part_start = 0; + let mut unescaped_start = 0; + let mut end = 0; + + for (i, c) in input.char_indices() { + state = match state { + OnWhitespace => match c { + '"' => { + end = i; + Dquoted + } + '\'' => { + end = i; + Quoted + } + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + UnquotedEscaped + } else { + OnWhitespace + } + } + c if c.is_ascii_whitespace() => { + end = i; OnWhitespace } - } - c if c.is_ascii_whitespace() => { - end = i; - OnWhitespace - } - _ => Unquoted, - }, - Unquoted => match c { - '\\' => { - if cfg!(unix) { - escaped.push_str(&input[start..i]); - start = i + 1; - UnquotedEscaped - } else { - Unquoted + _ => Unquoted, + }, + Unquoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + UnquotedEscaped + } else { + Unquoted + } } - } - c if c.is_ascii_whitespace() => { - end = i; - OnWhitespace - } - _ => Unquoted, - }, - UnquotedEscaped => Unquoted, - Quoted => match c { - '\\' => { - if cfg!(unix) { - escaped.push_str(&input[start..i]); - start = i + 1; - QuoteEscaped - } else { - Quoted + c if c.is_ascii_whitespace() => { + end = i; + OnWhitespace } - } - '\'' => { - end = i; - OnWhitespace - } - _ => Quoted, - }, - QuoteEscaped => Quoted, - Dquoted => match c { - '\\' => { - if cfg!(unix) { - escaped.push_str(&input[start..i]); - start = i + 1; - DquoteEscaped - } else { - Dquoted + _ => Unquoted, + }, + UnquotedEscaped => Unquoted, + Quoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + QuoteEscaped + } else { + Quoted + } } - } - '"' => { - end = i; - OnWhitespace - } - _ => Dquoted, - }, - DquoteEscaped => Dquoted, - }; + '\'' => { + end = i; + OnWhitespace + } + _ => Quoted, + }, + QuoteEscaped => Quoted, + Dquoted => match c { + '\\' => { + if cfg!(unix) { + escaped.push_str(&input[unescaped_start..i]); + unescaped_start = i + 1; + DquoteEscaped + } else { + Dquoted + } + } + '"' => { + end = i; + OnWhitespace + } + _ => Dquoted, + }, + DquoteEscaped => Dquoted, + }; - if i >= input.len() - 1 && end == 0 { - end = i + 1; - } + if i >= input.len() - 1 && end == 0 { + end = i + 1; + } - if end > 0 { - let esc_trim = escaped.trim(); - let inp = &input[start..end]; + if end > 0 { + let esc_trim = escaped.trim(); + let inp = &input[unescaped_start..end]; - if !(esc_trim.is_empty() && inp.trim().is_empty()) { - if esc_trim.is_empty() { - args.push(inp.into()); - } else { - args.push([escaped, inp.into()].concat().into()); - escaped = "".to_string(); + if !(esc_trim.is_empty() && inp.trim().is_empty()) { + if esc_trim.is_empty() { + words.push(inp.into()); + parts.push(inp); + } else { + words.push([escaped, inp.into()].concat().into()); + parts.push(&input[part_start..end]); + escaped = "".to_string(); + } } + unescaped_start = i + 1; + part_start = i + 1; + end = 0; } - start = i + 1; - end = 0; } - } - args -} -/// Checks that the input ends with an ascii whitespace character which is -/// not escaped. -/// -/// # Examples -/// -/// ```rust -/// use helix_core::shellwords::ends_with_whitespace; -/// assert_eq!(ends_with_whitespace(" "), true); -/// assert_eq!(ends_with_whitespace(":open "), true); -/// assert_eq!(ends_with_whitespace(":open foo.txt "), true); -/// assert_eq!(ends_with_whitespace(":open"), false); -/// #[cfg(unix)] -/// assert_eq!(ends_with_whitespace(":open a\\ "), false); -/// #[cfg(unix)] -/// assert_eq!(ends_with_whitespace(":open a\\ b.txt"), false); -/// ``` -pub fn ends_with_whitespace(input: &str) -> bool { - use State::*; + debug_assert!(words.len() == parts.len()); - // Fast-lane: the input must end with a whitespace character - // regardless of quoting. - if !input.ends_with(|c: char| c.is_ascii_whitespace()) { - return false; + Self { + state, + words, + parts, + } } +} - let mut state = Unquoted; +impl<'a> Shellwords<'a> { + /// Checks that the input ends with a whitespace character which is not escaped. + /// + /// # Examples + /// + /// ```rust + /// use helix_core::shellwords::Shellwords; + /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true); + /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false); + /// #[cfg(unix)] + /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false); + /// #[cfg(unix)] + /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false); + /// ``` + pub fn ends_with_whitespace(&self) -> bool { + matches!(self.state, State::OnWhitespace) + } - for c in input.chars() { - state = match state { - OnWhitespace => match c { - '"' => Dquoted, - '\'' => Quoted, - '\\' if cfg!(unix) => UnquotedEscaped, - '\\' => OnWhitespace, - c if c.is_ascii_whitespace() => OnWhitespace, - _ => Unquoted, - }, - Unquoted => match c { - '\\' if cfg!(unix) => UnquotedEscaped, - '\\' => Unquoted, - c if c.is_ascii_whitespace() => OnWhitespace, - _ => Unquoted, - }, - UnquotedEscaped => Unquoted, - Quoted => match c { - '\\' if cfg!(unix) => QuoteEscaped, - '\\' => Quoted, - '\'' => OnWhitespace, - _ => Quoted, - }, - QuoteEscaped => Quoted, - Dquoted => match c { - '\\' if cfg!(unix) => DquoteEscaped, - '\\' => Dquoted, - '"' => OnWhitespace, - _ => Dquoted, - }, - DquoteEscaped => Dquoted, - } + /// Returns the list of shellwords calculated from the input string. + pub fn words(&self) -> &[Cow<'a, str>] { + &self.words } - matches!(state, OnWhitespace) + /// Returns a list of strings which correspond to [`Self::words`] but represent the original + /// text in the input string - including escape characters - without separating whitespace. + pub fn parts(&self) -> &[&'a str] { + &self.parts + } } #[cfg(test)] @@ -212,7 +203,8 @@ mod test { #[cfg(windows)] fn test_normal() { let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; - let result = shellwords(input); + let shellwords = Shellwords::from(input); + let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), @@ -230,7 +222,8 @@ mod test { #[cfg(unix)] fn test_normal() { let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; - let result = shellwords(input); + let shellwords = Shellwords::from(input); + let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), @@ -247,7 +240,8 @@ mod test { fn test_quoted() { let quoted = r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#; - let result = shellwords(quoted); + let shellwords = Shellwords::from(quoted); + let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), @@ -262,7 +256,8 @@ mod test { #[cfg(unix)] fn test_dquoted() { let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#; - let result = shellwords(dquoted); + let shellwords = Shellwords::from(dquoted); + let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), @@ -277,7 +272,8 @@ mod test { #[cfg(unix)] fn test_mixed() { let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#; - let result = shellwords(dquoted); + let shellwords = Shellwords::from(dquoted); + let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), @@ -298,7 +294,8 @@ mod test { fn test_lists() { let input = r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"#; - let result = shellwords(input); + let shellwords = Shellwords::from(input); + let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":set"), Cow::from("statusline.center"), @@ -322,4 +319,18 @@ mod test { assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\"")); } + + #[test] + #[cfg(unix)] + fn test_parts() { + assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); + assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]); + } + + #[test] + #[cfg(windows)] + fn test_parts() { + assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); + assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]); + } } |