aboutsummaryrefslogtreecommitdiff
path: root/helix-core
diff options
context:
space:
mode:
authorKirawi2021-06-08 04:20:15 +0000
committerGitHub2021-06-08 04:20:15 +0000
commitb873fb9897bb5b24a60cca3d9fa69285446a857f (patch)
treefe13df2f3f57990f5e6ef93b0049457855d4ed76 /helix-core
parent8f1eb7b2b03fd6907307f4e0065d0c43da22edb3 (diff)
Fix Unicode (#135)
* init * wip * wip * fix unicode break * fix unicode break * Update helix-core/src/transaction.rs Co-authored-by: Benoît Cortier <benoit.cortier@fried-world.eu> * clippy * fix * add changes * added test * wip * wip * wip * wip * fix * fix view * fix #88 Co-authored-by: Benoît Cortier <benoit.cortier@fried-world.eu>
Diffstat (limited to 'helix-core')
-rw-r--r--helix-core/Cargo.toml1
-rw-r--r--helix-core/src/movement.rs85
-rw-r--r--helix-core/src/transaction.rs2
-rw-r--r--helix-core/src/words.rs14
4 files changed, 81 insertions, 21 deletions
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index c544e108..2d7a6ae8 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -19,6 +19,7 @@ smallvec = "1.4"
tendril = "0.4.2"
unicode-segmentation = "1.6"
unicode-width = "0.1"
+unicode-general-category = "0.4.0"
# slab = "0.4.2"
tree-sitter = "0.19"
once_cell = "1.4"
diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index 96bbd54b..9d62b16c 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@@ -88,11 +88,11 @@ pub fn move_next_word_start(slice: RopeSlice, mut begin: usize, count: usize) ->
if is_word(ch) {
skip_over_next(slice, &mut end, is_word);
- } else if ch.is_ascii_punctuation() {
- skip_over_next(slice, &mut end, |ch| ch.is_ascii_punctuation());
+ } else if is_punctuation(ch) {
+ skip_over_next(slice, &mut end, is_punctuation);
}
- skip_over_next(slice, &mut end, is_horiz_blank);
+ skip_over_next(slice, &mut end, char::is_whitespace);
}
Some(Range::new(begin, end - 1))
@@ -119,15 +119,15 @@ pub fn move_prev_word_start(slice: RopeSlice, mut begin: usize, count: usize) ->
end = begin;
- with_end = skip_over_prev(slice, &mut end, is_horiz_blank);
+ with_end = skip_over_prev(slice, &mut end, char::is_whitespace);
// refetch
let ch = slice.char(end);
if is_word(ch) {
with_end = skip_over_prev(slice, &mut end, is_word);
- } else if ch.is_ascii_punctuation() {
- with_end = skip_over_prev(slice, &mut end, |ch| ch.is_ascii_punctuation());
+ } else if is_punctuation(ch) {
+ with_end = skip_over_prev(slice, &mut end, is_punctuation);
}
}
@@ -155,15 +155,15 @@ pub fn move_next_word_end(slice: RopeSlice, mut begin: usize, count: usize) -> O
end = begin;
- skip_over_next(slice, &mut end, is_horiz_blank);
+ skip_over_next(slice, &mut end, char::is_whitespace);
// refetch
let ch = slice.char(end);
if is_word(ch) {
skip_over_next(slice, &mut end, is_word);
- } else if ch.is_ascii_punctuation() {
- skip_over_next(slice, &mut end, |ch| ch.is_ascii_punctuation());
+ } else if is_punctuation(ch) {
+ skip_over_next(slice, &mut end, is_punctuation);
}
}
@@ -174,12 +174,28 @@ pub fn move_next_word_end(slice: RopeSlice, mut begin: usize, count: usize) -> O
// used for by-word movement
+#[inline]
pub(crate) fn is_word(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_'
}
-pub(crate) fn is_horiz_blank(ch: char) -> bool {
- matches!(ch, ' ' | '\t')
+#[inline]
+pub(crate) fn is_punctuation(ch: char) -> bool {
+ use unicode_general_category::{get_general_category, GeneralCategory};
+
+ matches!(
+ get_general_category(ch),
+ GeneralCategory::OtherPunctuation
+ | GeneralCategory::OpenPunctuation
+ | GeneralCategory::ClosePunctuation
+ | GeneralCategory::InitialPunctuation
+ | GeneralCategory::FinalPunctuation
+ | GeneralCategory::ConnectorPunctuation
+ | GeneralCategory::DashPunctuation
+ | GeneralCategory::MathSymbol
+ | GeneralCategory::CurrencySymbol
+ | GeneralCategory::ModifierSymbol
+ )
}
#[derive(Debug, Eq, PartialEq)]
@@ -191,14 +207,15 @@ pub(crate) enum Category {
Unknown,
}
+#[inline]
pub(crate) fn categorize(ch: char) -> Category {
if ch == '\n' {
Category::Eol
- } else if ch.is_ascii_whitespace() {
+ } else if ch.is_whitespace() {
Category::Whitespace
} else if is_word(ch) {
Category::Word
- } else if ch.is_ascii_punctuation() {
+ } else if is_punctuation(ch) {
Category::Punctuation
} else {
Category::Unknown
@@ -213,6 +230,7 @@ where
{
let mut chars = slice.chars_at(*pos);
+ #[allow(clippy::while_let_on_iterator)]
while let Some(ch) = chars.next() {
if !fun(ch) {
break;
@@ -231,6 +249,7 @@ where
// need to +1 so that prev() includes current char
let mut chars = slice.chars_at(*pos + 1);
+ #[allow(clippy::while_let_on_iterator)]
while let Some(ch) = chars.prev() {
if !fun(ch) {
break;
@@ -259,4 +278,44 @@ mod test {
(1, 2).into()
);
}
+
+ #[test]
+ fn test_categorize() {
+ const WORD_TEST_CASE: &'static str =
+ "_hello_world_あいうえおー12345678901234567890";
+ const PUNCTUATION_TEST_CASE: &'static str = "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
+ const WHITESPACE_TEST_CASE: &'static str = "      ";
+
+ assert_eq!(Category::Eol, categorize('\n'));
+
+ for ch in WHITESPACE_TEST_CASE.chars() {
+ assert_eq!(
+ Category::Whitespace,
+ categorize(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
+ ch,
+ categorize(ch)
+ );
+ }
+
+ for ch in WORD_TEST_CASE.chars() {
+ assert_eq!(
+ Category::Word,
+ categorize(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Word`",
+ ch,
+ categorize(ch)
+ );
+ }
+
+ for ch in PUNCTUATION_TEST_CASE.chars() {
+ assert_eq!(
+ Category::Punctuation,
+ categorize(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
+ ch,
+ categorize(ch)
+ );
+ }
+ }
}
diff --git a/helix-core/src/transaction.rs b/helix-core/src/transaction.rs
index 085f40b7..3a719628 100644
--- a/helix-core/src/transaction.rs
+++ b/helix-core/src/transaction.rs
@@ -758,7 +758,7 @@ mod test {
#[test]
fn combine_with_utf8() {
- const TEST_CASE: &'static str = "Hello, これはヒレクスエディターです!";
+ const TEST_CASE: &'static str = "Hello, これはヘリックスエディターです!";
let empty = Rope::from("");
let mut a = ChangeSet::new(&empty);
diff --git a/helix-core/src/words.rs b/helix-core/src/words.rs
index 5ff25050..2cbd88d4 100644
--- a/helix-core/src/words.rs
+++ b/helix-core/src/words.rs
@@ -1,4 +1,4 @@
-use crate::movement::{categorize, is_horiz_blank, is_word, skip_over_prev};
+use crate::movement::{categorize, is_punctuation, is_word, skip_over_prev};
use ropey::RopeSlice;
#[must_use]
@@ -13,15 +13,15 @@ pub fn nth_prev_word_boundary(slice: RopeSlice, mut char_idx: usize, count: usiz
// return if not skip while?
skip_over_prev(slice, &mut char_idx, |ch| ch == '\n');
- with_end = skip_over_prev(slice, &mut char_idx, is_horiz_blank);
+ with_end = skip_over_prev(slice, &mut char_idx, char::is_whitespace);
// refetch
let ch = slice.char(char_idx);
if is_word(ch) {
with_end = skip_over_prev(slice, &mut char_idx, is_word);
- } else if ch.is_ascii_punctuation() {
- with_end = skip_over_prev(slice, &mut char_idx, |ch| ch.is_ascii_punctuation());
+ } else if is_punctuation(ch) {
+ with_end = skip_over_prev(slice, &mut char_idx, is_punctuation);
}
}
@@ -47,11 +47,11 @@ fn different_prev_word_boundary() {
t("hello, world", "hello, ");
t("hello, ", "hello");
t("hello", "");
- t("こんにちは、世界!", "こんにちは、世界!"); // TODO: punctuation
+ t("こんにちは、世界!", "こんにちは、世界");
t("こんにちは、世界", "こんにちは、");
- t("こんにちは、", "こんにちは、"); // what?
+ t("こんにちは、", "こんにちは");
t("こんにちは", "");
- t("この世界。", "この世界。"); // what?
+ t("この世界。", "この世界");
t("この世界", "");
t("お前はもう死んでいる", "");
t("その300円です", ""); // TODO: should stop at 300