Address PR comments.

- Move char functions into their own module under helix_core. - Use matches!() macro where appropriate. - Use a static lifetime on indent_unit() now that we can.
author: Nathan Vegdahl 2021-06-14 02:13:31 +0000
committer: Nathan Vegdahl 2021-06-15 01:32:23 +0000
commit: 0a5580aa21b55947859191a4f33244d77fb794ed (patch)
tree: f93dc151c4848781268f0888666c9a1bc29a080f
parent: 358ea6a37ccc3ee98f3680c9b4ee0dd0aa0781d2 (diff)
3 files changed, 44 insertions, 50 deletions
diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs
new file mode 100644
index 00000000..243a1374
--- /dev/null
+++ b/helix-core/src/chars.rs
@@ -0,0 +1,41 @@
+/// Determine whether a character is a line break.
+pub fn char_is_linebreak(c: char) -> bool {
+    matches!(
+        c,
+        '\u{000A}' | // LineFeed
+        '\u{000B}' | // VerticalTab
+        '\u{000C}' | // FormFeed
+        '\u{000D}' | // CarriageReturn
+        '\u{0085}' | // NextLine
+        '\u{2028}' | // Line Separator
+        '\u{2029}' // ParagraphSeparator
+    )
+}
+
+/// Determine whether a character qualifies as (non-line-break)
+/// whitespace.
+pub fn char_is_whitespace(c: char) -> bool {
+    // TODO: this is a naive binary categorization of whitespace
+    // characters.  For display, word wrapping, etc. we'll need a better
+    // categorization based on e.g. breaking vs non-breaking spaces
+    // and whether they're zero-width or not.
+    match c {
+        //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
+        '\u{0009}' | // Character Tabulation
+        '\u{0020}' | // Space
+        '\u{00A0}' | // No-break Space
+        '\u{180E}' | // Mongolian Vowel Separator
+        '\u{202F}' | // Narrow No-break Space
+        '\u{205F}' | // Medium Mathematical Space
+        '\u{3000}' | // Ideographic Space
+        '\u{FEFF}'   // Zero Width No-break Space
+        => true,
+
+        // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
+        // Four-per-em Space, Six-per-em Space, Figure Space,
+        // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
+        c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
+
+        _ => false,
+    }
+}
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index 79a22547..b11faeab 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -1,5 +1,6 @@
 #![allow(unused)]
 pub mod auto_pairs;
+pub mod chars;
 pub mod comment;
 pub mod diagnostic;
 pub mod graphemes;
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index d5ab1425..f4e4b7c6 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -5,6 +5,7 @@ use std::path::{Component, Path, PathBuf};
 use std::sync::Arc;
 
 use helix_core::{
+    chars::{char_is_linebreak, char_is_whitespace},
     history::History,
     syntax::{LanguageConfiguration, LOADER},
     ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction,
@@ -277,55 +278,6 @@ impl Document {
     }
 
     fn detect_indent_style(&mut self) {
-        // Determine whether a character is a line break.
-        //
-        // TODO: this is probably a generally useful utility function.  Where
-        // should we put it?
-        fn char_is_linebreak(c: char) -> bool {
-            [
-                '\u{000A}', // LineFeed
-                '\u{000B}', // VerticalTab
-                '\u{000C}', // FormFeed
-                '\u{000D}', // CarriageReturn
-                '\u{0085}', // NextLine
-                '\u{2028}', // Line Separator
-                '\u{2029}', // ParagraphSeparator
-            ]
-            .contains(&c)
-        }
-
-        // Determine whether a character qualifies as (non-line-break)
-        // whitespace.
-        //
-        // TODO: this is probably a generally useful utility function.  Where
-        // should we put it?
-        //
-        // TODO: this is a naive binary categorization of whitespace
-        // characters.  For display, word wrapping, etc. we'll need a better
-        // categorization based on e.g. breaking vs non-breaking spaces
-        // and whether they're zero-width or not.
-        pub fn char_is_whitespace(c: char) -> bool {
-            match c {
-                //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
-                '\u{0009}' | // Character Tabulation
-                '\u{0020}' | // Space
-                '\u{00A0}' | // No-break Space
-                '\u{180E}' | // Mongolian Vowel Separator
-                '\u{202F}' | // Narrow No-break Space
-                '\u{205F}' | // Medium Mathematical Space
-                '\u{3000}' | // Ideographic Space
-                '\u{FEFF}'   // Zero Width No-break Space
-                => true,
-
-                // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
-                // Four-per-em Space, Six-per-em Space, Figure Space,
-                // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
-                c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
-
-                _ => false,
-            }
-        }
-
         // Build a histogram of the indentation *increases* between
         // subsequent lines, ignoring lines that are all whitespace.
         //
@@ -689,7 +641,7 @@ impl Document {
     ///
     /// TODO: we might not need this function anymore, since the information
     /// is conveniently available in `Document::indent_style` now.
-    pub fn indent_unit(&self) -> &str {
+    pub fn indent_unit(&self) -> &'static str {
         match self.indent_style {
             IndentStyle::Tabs => "\t",
             IndentStyle::Spaces(1) => " ",
author	Nathan Vegdahl	2021-06-14 02:13:31 +0000
committer	Nathan Vegdahl	2021-06-15 01:32:23 +0000
commit	0a5580aa21b55947859191a4f33244d77fb794ed (patch)
tree	f93dc151c4848781268f0888666c9a1bc29a080f
parent	358ea6a37ccc3ee98f3680c9b4ee0dd0aa0781d2 (diff)