use std::borrow::Cow; /// Auto escape for shellwords usage. pub fn escape(input: Cow<str>) -> Cow<str> { if !input.chars().any(|x| x.is_ascii_whitespace()) { input } else if cfg!(unix) { Cow::Owned(input.chars().fold(String::new(), |mut buf, c| { if c.is_ascii_whitespace() { buf.push('\\'); } buf.push(c); buf })) } else { Cow::Owned(format!("\"{}\"", input)) } } enum State { OnWhitespace, Unquoted, UnquotedEscaped, Quoted, QuoteEscaped, Dquoted, DquoteEscaped, } pub struct Shellwords<'a> { state: State, /// Shellwords where whitespace and escapes has been resolved. words: Vec<Cow<'a, str>>, /// The parts of the input that are divided into shellwords. This can be /// used to retrieve the original text for a given word by looking up the /// same index in the Vec as the word in `words`. parts: Vec<&'a str>, } impl<'a> From<&'a str> for Shellwords<'a> { fn from(input: &'a str) -> Self { use State::*; let mut state = Unquoted; let mut words = Vec::new(); let mut parts = Vec::new(); let mut escaped = String::with_capacity(input.len()); let mut part_start = 0; let mut unescaped_start = 0; let mut end = 0; for (i, c) in input.char_indices() { state = match state { OnWhitespace => match c { '"' => { end = i; Dquoted } '\'' => { end = i; Quoted } '\\' => { if cfg!(unix) { escaped.push_str(&input[unescaped_start..i]); unescaped_start = i + 1; UnquotedEscaped } else { OnWhitespace } } c if c.is_ascii_whitespace() => { end = i; OnWhitespace } _ => Unquoted, }, Unquoted => match c { '\\' => { if cfg!(unix) { escaped.push_str(&input[unescaped_start..i]); unescaped_start = i + 1; UnquotedEscaped } else { Unquoted } } c if c.is_ascii_whitespace() => { end = i; OnWhitespace } _ => Unquoted, }, UnquotedEscaped => Unquoted, Quoted => match c { '\\' => { if cfg!(unix) { escaped.push_str(&input[unescaped_start..i]); unescaped_start = i + 1; QuoteEscaped } else { Quoted } } '\'' => { end = i; OnWhitespace } _ => Quoted, }, QuoteEscaped => Quoted, Dquoted => match c { '\\' => { if cfg!(unix) { escaped.push_str(&input[unescaped_start..i]); unescaped_start = i + 1; DquoteEscaped } else { Dquoted } } '"' => { end = i; OnWhitespace } _ => Dquoted, }, DquoteEscaped => Dquoted, }; let c_len = c.len_utf8(); if i == input.len() - c_len && end == 0 { end = i + c_len; } if end > 0 { let esc_trim = escaped.trim(); let inp = &input[unescaped_start..end]; if !(esc_trim.is_empty() && inp.trim().is_empty()) { if esc_trim.is_empty() { words.push(inp.into()); parts.push(inp); } else { words.push([escaped, inp.into()].concat().into()); parts.push(&input[part_start..end]); escaped = "".to_string(); } } unescaped_start = i + 1; part_start = i + 1; end = 0; } } debug_assert!(words.len() == parts.len()); Self { state, words, parts, } } } impl<'a> Shellwords<'a> { /// Checks that the input ends with a whitespace character which is not escaped. /// /// # Examples /// /// ```rust /// use helix_core::shellwords::Shellwords; /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true); /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true); /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true); /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false); /// #[cfg(unix)] /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false); /// #[cfg(unix)] /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false); /// ``` pub fn ends_with_whitespace(&self) -> bool { matches!(self.state, State::OnWhitespace) } /// Returns the list of shellwords calculated from the input string. pub fn words(&self) -> &[Cow<'a, str>] { &self.words } /// Returns a list of strings which correspond to [`Self::words`] but represent the original /// text in the input string - including escape characters - without separating whitespace. pub fn parts(&self) -> &[&'a str] { &self.parts } } #[cfg(test)] mod test { use super::*; #[test] #[cfg(windows)] fn test_normal() { let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; let shellwords = Shellwords::from(input); let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), Cow::from("twó"), Cow::from("wörds"), Cow::from("\\three\\"), Cow::from("\\"), Cow::from("with\\ escaping\\\\"), ]; // TODO test is_owned and is_borrowed, once they get stabilized. assert_eq!(expected, result); } #[test] #[cfg(unix)] fn test_normal() { let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#; let shellwords = Shellwords::from(input); let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), Cow::from("twó"), Cow::from("wörds"), Cow::from(r#"three "with escaping\"#), ]; // TODO test is_owned and is_borrowed, once they get stabilized. assert_eq!(expected, result); } #[test] #[cfg(unix)] fn test_quoted() { let quoted = r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#; let shellwords = Shellwords::from(quoted); let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), Cow::from("twó wörds"), Cow::from(r#"three' "with escaping\"#), Cow::from("quote incomplete"), ]; assert_eq!(expected, result); } #[test] #[cfg(unix)] fn test_dquoted() { let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#; let shellwords = Shellwords::from(dquoted); let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), Cow::from("twó wörds"), Cow::from(r#"three' "with escaping\"#), Cow::from("dquote incomplete"), ]; assert_eq!(expected, result); } #[test] #[cfg(unix)] fn test_mixed() { let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#; let shellwords = Shellwords::from(dquoted); let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":o"), Cow::from("single_word"), Cow::from("twó wörds"), Cow::from("three' \"with escaping\\"), Cow::from("no space before"), Cow::from("and after"), Cow::from("$#%^@"), Cow::from("%^&(%^"), Cow::from(")(*&^%"), Cow::from(r#"a\\b"#), //last ' just changes to quoted but since we dont have anything after it, it should be ignored ]; assert_eq!(expected, result); } #[test] fn test_lists() { let input = r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "quotes"]'"#; let shellwords = Shellwords::from(input); let result = shellwords.words().to_vec(); let expected = vec![ Cow::from(":set"), Cow::from("statusline.center"), Cow::from(r#"["file-type","file-encoding"]"#), Cow::from(r#"["list", "in", "quotes"]"#), ]; assert_eq!(expected, result); } #[test] #[cfg(unix)] fn test_escaping_unix() { assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); assert_eq!(escape("foo bar".into()), Cow::Borrowed("foo\\ bar")); assert_eq!(escape("foo\tbar".into()), Cow::Borrowed("foo\\\tbar")); } #[test] #[cfg(windows)] fn test_escaping_windows() { assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar")); assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\"")); } #[test] #[cfg(unix)] fn test_parts() { assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]); } #[test] #[cfg(windows)] fn test_parts() { assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]); assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]); } #[test] fn test_multibyte_at_end() { assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]); assert_eq!( Shellwords::from(":sh echo 𒀀").parts(), &[":sh", "echo", "𒀀"] ); assert_eq!( Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(), &[":sh", "echo", "𒀀", "hello", "world𒀀"] ); } }