summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--helix-core/src/chars.rs41
-rw-r--r--helix-core/src/lib.rs1
-rw-r--r--helix-view/src/document.rs52
3 files changed, 44 insertions, 50 deletions
diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs
new file mode 100644
index 00000000..243a1374
--- /dev/null
+++ b/helix-core/src/chars.rs
@@ -0,0 +1,41 @@
+/// Determine whether a character is a line break.
+pub fn char_is_linebreak(c: char) -> bool {
+ matches!(
+ c,
+ '\u{000A}' | // LineFeed
+ '\u{000B}' | // VerticalTab
+ '\u{000C}' | // FormFeed
+ '\u{000D}' | // CarriageReturn
+ '\u{0085}' | // NextLine
+ '\u{2028}' | // Line Separator
+ '\u{2029}' // ParagraphSeparator
+ )
+}
+
+/// Determine whether a character qualifies as (non-line-break)
+/// whitespace.
+pub fn char_is_whitespace(c: char) -> bool {
+ // TODO: this is a naive binary categorization of whitespace
+ // characters. For display, word wrapping, etc. we'll need a better
+ // categorization based on e.g. breaking vs non-breaking spaces
+ // and whether they're zero-width or not.
+ match c {
+ //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
+ '\u{0009}' | // Character Tabulation
+ '\u{0020}' | // Space
+ '\u{00A0}' | // No-break Space
+ '\u{180E}' | // Mongolian Vowel Separator
+ '\u{202F}' | // Narrow No-break Space
+ '\u{205F}' | // Medium Mathematical Space
+ '\u{3000}' | // Ideographic Space
+ '\u{FEFF}' // Zero Width No-break Space
+ => true,
+
+ // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
+ // Four-per-em Space, Six-per-em Space, Figure Space,
+ // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
+ c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
+
+ _ => false,
+ }
+}
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index 79a22547..b11faeab 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -1,5 +1,6 @@
#![allow(unused)]
pub mod auto_pairs;
+pub mod chars;
pub mod comment;
pub mod diagnostic;
pub mod graphemes;
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index d5ab1425..f4e4b7c6 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -5,6 +5,7 @@ use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
use helix_core::{
+ chars::{char_is_linebreak, char_is_whitespace},
history::History,
syntax::{LanguageConfiguration, LOADER},
ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction,
@@ -277,55 +278,6 @@ impl Document {
}
fn detect_indent_style(&mut self) {
- // Determine whether a character is a line break.
- //
- // TODO: this is probably a generally useful utility function. Where
- // should we put it?
- fn char_is_linebreak(c: char) -> bool {
- [
- '\u{000A}', // LineFeed
- '\u{000B}', // VerticalTab
- '\u{000C}', // FormFeed
- '\u{000D}', // CarriageReturn
- '\u{0085}', // NextLine
- '\u{2028}', // Line Separator
- '\u{2029}', // ParagraphSeparator
- ]
- .contains(&c)
- }
-
- // Determine whether a character qualifies as (non-line-break)
- // whitespace.
- //
- // TODO: this is probably a generally useful utility function. Where
- // should we put it?
- //
- // TODO: this is a naive binary categorization of whitespace
- // characters. For display, word wrapping, etc. we'll need a better
- // categorization based on e.g. breaking vs non-breaking spaces
- // and whether they're zero-width or not.
- pub fn char_is_whitespace(c: char) -> bool {
- match c {
- //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
- '\u{0009}' | // Character Tabulation
- '\u{0020}' | // Space
- '\u{00A0}' | // No-break Space
- '\u{180E}' | // Mongolian Vowel Separator
- '\u{202F}' | // Narrow No-break Space
- '\u{205F}' | // Medium Mathematical Space
- '\u{3000}' | // Ideographic Space
- '\u{FEFF}' // Zero Width No-break Space
- => true,
-
- // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
- // Four-per-em Space, Six-per-em Space, Figure Space,
- // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
- c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
-
- _ => false,
- }
- }
-
// Build a histogram of the indentation *increases* between
// subsequent lines, ignoring lines that are all whitespace.
//
@@ -689,7 +641,7 @@ impl Document {
///
/// TODO: we might not need this function anymore, since the information
/// is conveniently available in `Document::indent_style` now.
- pub fn indent_unit(&self) -> &str {
+ pub fn indent_unit(&self) -> &'static str {
match self.indent_style {
IndentStyle::Tabs => "\t",
IndentStyle::Spaces(1) => " ",