summaryrefslogtreecommitdiff
path: root/helix-core/src/shellwords.rs
diff options
context:
space:
mode:
Diffstat (limited to 'helix-core/src/shellwords.rs')
-rw-r--r--helix-core/src/shellwords.rs331
1 files changed, 171 insertions, 160 deletions
diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs
index 7742896c..9475f5e5 100644
--- a/helix-core/src/shellwords.rs
+++ b/helix-core/src/shellwords.rs
@@ -27,181 +27,172 @@ enum State {
DquoteEscaped,
}
-/// Get the vec of escaped / quoted / doublequoted filenames from the input str
-pub fn shellwords(input: &str) -> Vec<Cow<'_, str>> {
- use State::*;
+pub struct Shellwords<'a> {
+ state: State,
+ /// Shellwords where whitespace and escapes has been resolved.
+ words: Vec<Cow<'a, str>>,
+ /// The parts of the input that are divided into shellwords. This can be
+ /// used to retrieve the original text for a given word by looking up the
+ /// same index in the Vec as the word in `words`.
+ parts: Vec<&'a str>,
+}
- let mut state = Unquoted;
- let mut args: Vec<Cow<str>> = Vec::new();
- let mut escaped = String::with_capacity(input.len());
+impl<'a> From<&'a str> for Shellwords<'a> {
+ fn from(input: &'a str) -> Self {
+ use State::*;
- let mut start = 0;
- let mut end = 0;
+ let mut state = Unquoted;
+ let mut words = Vec::new();
+ let mut parts = Vec::new();
+ let mut escaped = String::with_capacity(input.len());
- for (i, c) in input.char_indices() {
- state = match state {
- OnWhitespace => match c {
- '"' => {
- end = i;
- Dquoted
- }
- '\'' => {
- end = i;
- Quoted
- }
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[start..i]);
- start = i + 1;
- UnquotedEscaped
- } else {
+ let mut part_start = 0;
+ let mut unescaped_start = 0;
+ let mut end = 0;
+
+ for (i, c) in input.char_indices() {
+ state = match state {
+ OnWhitespace => match c {
+ '"' => {
+ end = i;
+ Dquoted
+ }
+ '\'' => {
+ end = i;
+ Quoted
+ }
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ UnquotedEscaped
+ } else {
+ OnWhitespace
+ }
+ }
+ c if c.is_ascii_whitespace() => {
+ end = i;
OnWhitespace
}
- }
- c if c.is_ascii_whitespace() => {
- end = i;
- OnWhitespace
- }
- _ => Unquoted,
- },
- Unquoted => match c {
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[start..i]);
- start = i + 1;
- UnquotedEscaped
- } else {
- Unquoted
+ _ => Unquoted,
+ },
+ Unquoted => match c {
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ UnquotedEscaped
+ } else {
+ Unquoted
+ }
}
- }
- c if c.is_ascii_whitespace() => {
- end = i;
- OnWhitespace
- }
- _ => Unquoted,
- },
- UnquotedEscaped => Unquoted,
- Quoted => match c {
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[start..i]);
- start = i + 1;
- QuoteEscaped
- } else {
- Quoted
+ c if c.is_ascii_whitespace() => {
+ end = i;
+ OnWhitespace
}
- }
- '\'' => {
- end = i;
- OnWhitespace
- }
- _ => Quoted,
- },
- QuoteEscaped => Quoted,
- Dquoted => match c {
- '\\' => {
- if cfg!(unix) {
- escaped.push_str(&input[start..i]);
- start = i + 1;
- DquoteEscaped
- } else {
- Dquoted
+ _ => Unquoted,
+ },
+ UnquotedEscaped => Unquoted,
+ Quoted => match c {
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ QuoteEscaped
+ } else {
+ Quoted
+ }
}
- }
- '"' => {
- end = i;
- OnWhitespace
- }
- _ => Dquoted,
- },
- DquoteEscaped => Dquoted,
- };
+ '\'' => {
+ end = i;
+ OnWhitespace
+ }
+ _ => Quoted,
+ },
+ QuoteEscaped => Quoted,
+ Dquoted => match c {
+ '\\' => {
+ if cfg!(unix) {
+ escaped.push_str(&input[unescaped_start..i]);
+ unescaped_start = i + 1;
+ DquoteEscaped
+ } else {
+ Dquoted
+ }
+ }
+ '"' => {
+ end = i;
+ OnWhitespace
+ }
+ _ => Dquoted,
+ },
+ DquoteEscaped => Dquoted,
+ };
- if i >= input.len() - 1 && end == 0 {
- end = i + 1;
- }
+ if i >= input.len() - 1 && end == 0 {
+ end = i + 1;
+ }
- if end > 0 {
- let esc_trim = escaped.trim();
- let inp = &input[start..end];
+ if end > 0 {
+ let esc_trim = escaped.trim();
+ let inp = &input[unescaped_start..end];
- if !(esc_trim.is_empty() && inp.trim().is_empty()) {
- if esc_trim.is_empty() {
- args.push(inp.into());
- } else {
- args.push([escaped, inp.into()].concat().into());
- escaped = "".to_string();
+ if !(esc_trim.is_empty() && inp.trim().is_empty()) {
+ if esc_trim.is_empty() {
+ words.push(inp.into());
+ parts.push(inp);
+ } else {
+ words.push([escaped, inp.into()].concat().into());
+ parts.push(&input[part_start..end]);
+ escaped = "".to_string();
+ }
}
+ unescaped_start = i + 1;
+ part_start = i + 1;
+ end = 0;
}
- start = i + 1;
- end = 0;
}
- }
- args
-}
-/// Checks that the input ends with an ascii whitespace character which is
-/// not escaped.
-///
-/// # Examples
-///
-/// ```rust
-/// use helix_core::shellwords::ends_with_whitespace;
-/// assert_eq!(ends_with_whitespace(" "), true);
-/// assert_eq!(ends_with_whitespace(":open "), true);
-/// assert_eq!(ends_with_whitespace(":open foo.txt "), true);
-/// assert_eq!(ends_with_whitespace(":open"), false);
-/// #[cfg(unix)]
-/// assert_eq!(ends_with_whitespace(":open a\\ "), false);
-/// #[cfg(unix)]
-/// assert_eq!(ends_with_whitespace(":open a\\ b.txt"), false);
-/// ```
-pub fn ends_with_whitespace(input: &str) -> bool {
- use State::*;
+ debug_assert!(words.len() == parts.len());
- // Fast-lane: the input must end with a whitespace character
- // regardless of quoting.
- if !input.ends_with(|c: char| c.is_ascii_whitespace()) {
- return false;
+ Self {
+ state,
+ words,
+ parts,
+ }
}
+}
- let mut state = Unquoted;
+impl<'a> Shellwords<'a> {
+ /// Checks that the input ends with a whitespace character which is not escaped.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use helix_core::shellwords::Shellwords;
+ /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
+ /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
+ /// #[cfg(unix)]
+ /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
+ /// #[cfg(unix)]
+ /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
+ /// ```
+ pub fn ends_with_whitespace(&self) -> bool {
+ matches!(self.state, State::OnWhitespace)
+ }
- for c in input.chars() {
- state = match state {
- OnWhitespace => match c {
- '"' => Dquoted,
- '\'' => Quoted,
- '\\' if cfg!(unix) => UnquotedEscaped,
- '\\' => OnWhitespace,
- c if c.is_ascii_whitespace() => OnWhitespace,
- _ => Unquoted,
- },
- Unquoted => match c {
- '\\' if cfg!(unix) => UnquotedEscaped,
- '\\' => Unquoted,
- c if c.is_ascii_whitespace() => OnWhitespace,
- _ => Unquoted,
- },
- UnquotedEscaped => Unquoted,
- Quoted => match c {
- '\\' if cfg!(unix) => QuoteEscaped,
- '\\' => Quoted,
- '\'' => OnWhitespace,
- _ => Quoted,
- },
- QuoteEscaped => Quoted,
- Dquoted => match c {
- '\\' if cfg!(unix) => DquoteEscaped,
- '\\' => Dquoted,
- '"' => OnWhitespace,
- _ => Dquoted,
- },
- DquoteEscaped => Dquoted,
- }
+ /// Returns the list of shellwords calculated from the input string.
+ pub fn words(&self) -> &[Cow<'a, str>] {
+ &self.words
}
- matches!(state, OnWhitespace)
+ /// Returns a list of strings which correspond to [`Self::words`] but represent the original
+ /// text in the input string - including escape characters - without separating whitespace.
+ pub fn parts(&self) -> &[&'a str] {
+ &self.parts
+ }
}
#[cfg(test)]
@@ -212,7 +203,8 @@ mod test {
#[cfg(windows)]
fn test_normal() {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
- let result = shellwords(input);
+ let shellwords = Shellwords::from(input);
+ let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
@@ -230,7 +222,8 @@ mod test {
#[cfg(unix)]
fn test_normal() {
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
- let result = shellwords(input);
+ let shellwords = Shellwords::from(input);
+ let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
@@ -247,7 +240,8 @@ mod test {
fn test_quoted() {
let quoted =
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;
- let result = shellwords(quoted);
+ let shellwords = Shellwords::from(quoted);
+ let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
@@ -262,7 +256,8 @@ mod test {
#[cfg(unix)]
fn test_dquoted() {
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;
- let result = shellwords(dquoted);
+ let shellwords = Shellwords::from(dquoted);
+ let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
@@ -277,7 +272,8 @@ mod test {
#[cfg(unix)]
fn test_mixed() {
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;
- let result = shellwords(dquoted);
+ let shellwords = Shellwords::from(dquoted);
+ let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":o"),
Cow::from("single_word"),
@@ -298,7 +294,8 @@ mod test {
fn test_lists() {
let input =
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"#;
- let result = shellwords(input);
+ let shellwords = Shellwords::from(input);
+ let result = shellwords.words().to_vec();
let expected = vec![
Cow::from(":set"),
Cow::from("statusline.center"),
@@ -322,4 +319,18 @@ mod test {
assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));
}
+
+ #[test]
+ #[cfg(unix)]
+ fn test_parts() {
+ assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
+ assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);
+ }
+
+ #[test]
+ #[cfg(windows)]
+ fn test_parts() {
+ assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
+ assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);
+ }
}