summaryrefslogtreecommitdiff
path: root/helix-tui/src/widgets/reflow.rs
diff options
context:
space:
mode:
Diffstat (limited to 'helix-tui/src/widgets/reflow.rs')
-rw-r--r--helix-tui/src/widgets/reflow.rs534
1 files changed, 534 insertions, 0 deletions
diff --git a/helix-tui/src/widgets/reflow.rs b/helix-tui/src/widgets/reflow.rs
new file mode 100644
index 00000000..94ff7330
--- /dev/null
+++ b/helix-tui/src/widgets/reflow.rs
@@ -0,0 +1,534 @@
+use crate::text::StyledGrapheme;
+use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::UnicodeWidthStr;
+
+const NBSP: &str = "\u{00a0}";
+
+/// A state machine to pack styled symbols into lines.
+/// Cannot implement it as Iterator since it yields slices of the internal buffer (need streaming
+/// iterators for that).
+pub trait LineComposer<'a> {
+ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)>;
+}
+
+/// A state machine that wraps lines on word boundaries.
+pub struct WordWrapper<'a, 'b> {
+ symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
+ max_line_width: u16,
+ current_line: Vec<StyledGrapheme<'a>>,
+ next_line: Vec<StyledGrapheme<'a>>,
+ /// Removes the leading whitespace from lines
+ trim: bool,
+}
+
+impl<'a, 'b> WordWrapper<'a, 'b> {
+ pub fn new(
+ symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
+ max_line_width: u16,
+ trim: bool,
+ ) -> WordWrapper<'a, 'b> {
+ WordWrapper {
+ symbols,
+ max_line_width,
+ current_line: vec![],
+ next_line: vec![],
+ trim,
+ }
+ }
+}
+
+impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> {
+ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> {
+ if self.max_line_width == 0 {
+ return None;
+ }
+ std::mem::swap(&mut self.current_line, &mut self.next_line);
+ self.next_line.truncate(0);
+
+ let mut current_line_width = self
+ .current_line
+ .iter()
+ .map(|StyledGrapheme { symbol, .. }| symbol.width() as u16)
+ .sum();
+
+ let mut symbols_to_last_word_end: usize = 0;
+ let mut width_to_last_word_end: u16 = 0;
+ let mut prev_whitespace = false;
+ let mut symbols_exhausted = true;
+ for StyledGrapheme { symbol, style } in &mut self.symbols {
+ symbols_exhausted = false;
+ let symbol_whitespace = symbol.chars().all(&char::is_whitespace) && symbol != NBSP;
+
+ // Ignore characters wider that the total max width.
+ if symbol.width() as u16 > self.max_line_width
+ // Skip leading whitespace when trim is enabled.
+ || self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0
+ {
+ continue;
+ }
+
+ // Break on newline and discard it.
+ if symbol == "\n" {
+ if prev_whitespace {
+ current_line_width = width_to_last_word_end;
+ self.current_line.truncate(symbols_to_last_word_end);
+ }
+ break;
+ }
+
+ // Mark the previous symbol as word end.
+ if symbol_whitespace && !prev_whitespace {
+ symbols_to_last_word_end = self.current_line.len();
+ width_to_last_word_end = current_line_width;
+ }
+
+ self.current_line.push(StyledGrapheme { symbol, style });
+ current_line_width += symbol.width() as u16;
+
+ if current_line_width > self.max_line_width {
+ // If there was no word break in the text, wrap at the end of the line.
+ let (truncate_at, truncated_width) = if symbols_to_last_word_end != 0 {
+ (symbols_to_last_word_end, width_to_last_word_end)
+ } else {
+ (self.current_line.len() - 1, self.max_line_width)
+ };
+
+ // Push the remainder to the next line but strip leading whitespace:
+ {
+ let remainder = &self.current_line[truncate_at..];
+ if let Some(remainder_nonwhite) =
+ remainder.iter().position(|StyledGrapheme { symbol, .. }| {
+ !symbol.chars().all(&char::is_whitespace)
+ })
+ {
+ self.next_line
+ .extend_from_slice(&remainder[remainder_nonwhite..]);
+ }
+ }
+ self.current_line.truncate(truncate_at);
+ current_line_width = truncated_width;
+ break;
+ }
+
+ prev_whitespace = symbol_whitespace;
+ }
+
+ // Even if the iterator is exhausted, pass the previous remainder.
+ if symbols_exhausted && self.current_line.is_empty() {
+ None
+ } else {
+ Some((&self.current_line[..], current_line_width))
+ }
+ }
+}
+
+/// A state machine that truncates overhanging lines.
+pub struct LineTruncator<'a, 'b> {
+ symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
+ max_line_width: u16,
+ current_line: Vec<StyledGrapheme<'a>>,
+ /// Record the offet to skip render
+ horizontal_offset: u16,
+}
+
+impl<'a, 'b> LineTruncator<'a, 'b> {
+ pub fn new(
+ symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
+ max_line_width: u16,
+ ) -> LineTruncator<'a, 'b> {
+ LineTruncator {
+ symbols,
+ max_line_width,
+ horizontal_offset: 0,
+ current_line: vec![],
+ }
+ }
+
+ pub fn set_horizontal_offset(&mut self, horizontal_offset: u16) {
+ self.horizontal_offset = horizontal_offset;
+ }
+}
+
+impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
+ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> {
+ if self.max_line_width == 0 {
+ return None;
+ }
+
+ self.current_line.truncate(0);
+ let mut current_line_width = 0;
+
+ let mut skip_rest = false;
+ let mut symbols_exhausted = true;
+ let mut horizontal_offset = self.horizontal_offset as usize;
+ for StyledGrapheme { symbol, style } in &mut self.symbols {
+ symbols_exhausted = false;
+
+ // Ignore characters wider that the total max width.
+ if symbol.width() as u16 > self.max_line_width {
+ continue;
+ }
+
+ // Break on newline and discard it.
+ if symbol == "\n" {
+ break;
+ }
+
+ if current_line_width + symbol.width() as u16 > self.max_line_width {
+ // Exhaust the remainder of the line.
+ skip_rest = true;
+ break;
+ }
+
+ let symbol = if horizontal_offset == 0 {
+ symbol
+ } else {
+ let w = symbol.width();
+ if w > horizontal_offset {
+ let t = trim_offset(symbol, horizontal_offset);
+ horizontal_offset = 0;
+ t
+ } else {
+ horizontal_offset -= w;
+ ""
+ }
+ };
+ current_line_width += symbol.width() as u16;
+ self.current_line.push(StyledGrapheme { symbol, style });
+ }
+
+ if skip_rest {
+ for StyledGrapheme { symbol, .. } in &mut self.symbols {
+ if symbol == "\n" {
+ break;
+ }
+ }
+ }
+
+ if symbols_exhausted && self.current_line.is_empty() {
+ None
+ } else {
+ Some((&self.current_line[..], current_line_width))
+ }
+ }
+}
+
+/// This function will return a str slice which start at specified offset.
+/// As src is a unicode str, start offset has to be calculated with each character.
+fn trim_offset(src: &str, mut offset: usize) -> &str {
+ let mut start = 0;
+ for c in UnicodeSegmentation::graphemes(src, true) {
+ let w = c.width();
+ if w <= offset {
+ offset -= w;
+ start += c.len();
+ } else {
+ break;
+ }
+ }
+ &src[start..]
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use unicode_segmentation::UnicodeSegmentation;
+
+ enum Composer {
+ WordWrapper { trim: bool },
+ LineTruncator,
+ }
+
+ fn run_composer(which: Composer, text: &str, text_area_width: u16) -> (Vec<String>, Vec<u16>) {
+ let style = Default::default();
+ let mut styled =
+ UnicodeSegmentation::graphemes(text, true).map(|g| StyledGrapheme { symbol: g, style });
+ let mut composer: Box<dyn LineComposer> = match which {
+ Composer::WordWrapper { trim } => {
+ Box::new(WordWrapper::new(&mut styled, text_area_width, trim))
+ }
+ Composer::LineTruncator => Box::new(LineTruncator::new(&mut styled, text_area_width)),
+ };
+ let mut lines = vec![];
+ let mut widths = vec![];
+ while let Some((styled, width)) = composer.next_line() {
+ let line = styled
+ .iter()
+ .map(|StyledGrapheme { symbol, .. }| *symbol)
+ .collect::<String>();
+ assert!(width <= text_area_width);
+ lines.push(line);
+ widths.push(width);
+ }
+ (lines, widths)
+ }
+
+ #[test]
+ fn line_composer_one_line() {
+ let width = 40;
+ for i in 1..width {
+ let text = "a".repeat(i);
+ let (word_wrapper, _) =
+ run_composer(Composer::WordWrapper { trim: true }, &text, width as u16);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, &text, width as u16);
+ let expected = vec![text];
+ assert_eq!(word_wrapper, expected);
+ assert_eq!(line_truncator, expected);
+ }
+ }
+
+ #[test]
+ fn line_composer_short_lines() {
+ let width = 20;
+ let text =
+ "abcdefg\nhijklmno\npabcdefg\nhijklmn\nopabcdefghijk\nlmnopabcd\n\n\nefghijklmno";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
+
+ let wrapped: Vec<&str> = text.split('\n').collect();
+ assert_eq!(word_wrapper, wrapped);
+ assert_eq!(line_truncator, wrapped);
+ }
+
+ #[test]
+ fn line_composer_long_word() {
+ let width = 20;
+ let text = "abcdefghijklmnopabcdefghijklmnopabcdefghijklmnopabcdefghijklmno";
+ let (word_wrapper, _) =
+ run_composer(Composer::WordWrapper { trim: true }, text, width as u16);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width as u16);
+
+ let wrapped = vec![
+ &text[..width],
+ &text[width..width * 2],
+ &text[width * 2..width * 3],
+ &text[width * 3..],
+ ];
+ assert_eq!(
+ word_wrapper, wrapped,
+ "WordWrapper should detect the line cannot be broken on word boundary and \
+ break it at line width limit."
+ );
+ assert_eq!(line_truncator, vec![&text[..width]]);
+ }
+
+ #[test]
+ fn line_composer_long_sentence() {
+ let width = 20;
+ let text =
+ "abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab c d e f g h i j k l m n o";
+ let text_multi_space =
+ "abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab c d e f g h i j k l \
+ m n o";
+ let (word_wrapper_single_space, _) =
+ run_composer(Composer::WordWrapper { trim: true }, text, width as u16);
+ let (word_wrapper_multi_space, _) = run_composer(
+ Composer::WordWrapper { trim: true },
+ text_multi_space,
+ width as u16,
+ );
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width as u16);
+
+ let word_wrapped = vec![
+ "abcd efghij",
+ "klmnopabcd efgh",
+ "ijklmnopabcdefg",
+ "hijkl mnopab c d e f",
+ "g h i j k l m n o",
+ ];
+ assert_eq!(word_wrapper_single_space, word_wrapped);
+ assert_eq!(word_wrapper_multi_space, word_wrapped);
+
+ assert_eq!(line_truncator, vec![&text[..width]]);
+ }
+
+ #[test]
+ fn line_composer_zero_width() {
+ let width = 0;
+ let text = "abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab ";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
+
+ let expected: Vec<&str> = Vec::new();
+ assert_eq!(word_wrapper, expected);
+ assert_eq!(line_truncator, expected);
+ }
+
+ #[test]
+ fn line_composer_max_line_width_of_1() {
+ let width = 1;
+ let text = "abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab ";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
+
+ let expected: Vec<&str> = UnicodeSegmentation::graphemes(text, true)
+ .filter(|g| g.chars().any(|c| !c.is_whitespace()))
+ .collect();
+ assert_eq!(word_wrapper, expected);
+ assert_eq!(line_truncator, vec!["a"]);
+ }
+
+ #[test]
+ fn line_composer_max_line_width_of_1_double_width_characters() {
+ let width = 1;
+ let text = "コンピュータ上で文字を扱う場合、典型的には文字\naaaによる通信を行う場合にその\
+ 両端点では、";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
+ assert_eq!(word_wrapper, vec!["", "a", "a", "a"]);
+ assert_eq!(line_truncator, vec!["", "a"]);
+ }
+
+ /// Tests WordWrapper with words some of which exceed line length and some not.
+ #[test]
+ fn line_composer_word_wrapper_mixed_length() {
+ let width = 20;
+ let text = "abcd efghij klmnopabcdefghijklmnopabcdefghijkl mnopab cdefghi j klmno";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ assert_eq!(
+ word_wrapper,
+ vec![
+ "abcd efghij",
+ "klmnopabcdefghijklmn",
+ "opabcdefghijkl",
+ "mnopab cdefghi j",
+ "klmno",
+ ]
+ )
+ }
+
+ #[test]
+ fn line_composer_double_width_chars() {
+ let width = 20;
+ let text = "コンピュータ上で文字を扱う場合、典型的には文字による通信を行う場合にその両端点\
+ では、";
+ let (word_wrapper, word_wrapper_width) =
+ run_composer(Composer::WordWrapper { trim: true }, &text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, &text, width);
+ assert_eq!(line_truncator, vec!["コンピュータ上で文字"]);
+ let wrapped = vec![
+ "コンピュータ上で文字",
+ "を扱う場合、典型的に",
+ "は文字による通信を行",
+ "う場合にその両端点で",
+ "は、",
+ ];
+ assert_eq!(word_wrapper, wrapped);
+ assert_eq!(word_wrapper_width, vec![width, width, width, width, 4]);
+ }
+
+ #[test]
+ fn line_composer_leading_whitespace_removal() {
+ let width = 20;
+ let text = "AAAAAAAAAAAAAAAAAAAA AAA";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
+ assert_eq!(word_wrapper, vec!["AAAAAAAAAAAAAAAAAAAA", "AAA",]);
+ assert_eq!(line_truncator, vec!["AAAAAAAAAAAAAAAAAAAA"]);
+ }
+
+ /// Tests truncation of leading whitespace.
+ #[test]
+ fn line_composer_lots_of_spaces() {
+ let width = 20;
+ let text = " ";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
+ assert_eq!(word_wrapper, vec![""]);
+ assert_eq!(line_truncator, vec![" "]);
+ }
+
+ /// Tests an input starting with a letter, folowed by spaces - some of the behaviour is
+ /// incidental.
+ #[test]
+ fn line_composer_char_plus_lots_of_spaces() {
+ let width = 20;
+ let text = "a ";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
+ // What's happening below is: the first line gets consumed, trailing spaces discarded,
+ // after 20 of which a word break occurs (probably shouldn't). The second line break
+ // discards all whitespace. The result should probably be vec!["a"] but it doesn't matter
+ // that much.
+ assert_eq!(word_wrapper, vec!["a", ""]);
+ assert_eq!(line_truncator, vec!["a "]);
+ }
+
+ #[test]
+ fn line_composer_word_wrapper_double_width_chars_mixed_with_spaces() {
+ let width = 20;
+ // Japanese seems not to use spaces but we should break on spaces anyway... We're using it
+ // to test double-width chars.
+ // You are more than welcome to add word boundary detection based of alterations of
+ // hiragana and katakana...
+ // This happens to also be a test case for mixed width because regular spaces are single width.
+ let text = "コンピュ ータ上で文字を扱う場合、 典型的には文 字による 通信を行 う場合にその両端点では、";
+ let (word_wrapper, word_wrapper_width) =
+ run_composer(Composer::WordWrapper { trim: true }, text, width);
+ assert_eq!(
+ word_wrapper,
+ vec![
+ "コンピュ",
+ "ータ上で文字を扱う場",
+ "合、 典型的には文",
+ "字による 通信を行",
+ "う場合にその両端点で",
+ "は、",
+ ]
+ );
+ // Odd-sized lines have a space in them.
+ assert_eq!(word_wrapper_width, vec![8, 20, 17, 17, 20, 4]);
+ }
+
+ /// Ensure words separated by nbsp are wrapped as if they were a single one.
+ #[test]
+ fn line_composer_word_wrapper_nbsp() {
+ let width = 20;
+ let text = "AAAAAAAAAAAAAAA AAAA\u{00a0}AAA";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
+ assert_eq!(word_wrapper, vec!["AAAAAAAAAAAAAAA", "AAAA\u{00a0}AAA",]);
+
+ // Ensure that if the character was a regular space, it would be wrapped differently.
+ let text_space = text.replace("\u{00a0}", " ");
+ let (word_wrapper_space, _) =
+ run_composer(Composer::WordWrapper { trim: true }, &text_space, width);
+ assert_eq!(word_wrapper_space, vec!["AAAAAAAAAAAAAAA AAAA", "AAA",]);
+ }
+
+ #[test]
+ fn line_composer_word_wrapper_preserve_indentation() {
+ let width = 20;
+ let text = "AAAAAAAAAAAAAAAAAAAA AAA";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: false }, text, width);
+ assert_eq!(word_wrapper, vec!["AAAAAAAAAAAAAAAAAAAA", " AAA",]);
+ }
+
+ #[test]
+ fn line_composer_word_wrapper_preserve_indentation_with_wrap() {
+ let width = 10;
+ let text = "AAA AAA AAAAA AA AAAAAA\n B\n C\n D";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: false }, text, width);
+ assert_eq!(
+ word_wrapper,
+ vec!["AAA AAA", "AAAAA AA", "AAAAAA", " B", " C", " D"]
+ );
+ }
+
+ #[test]
+ fn line_composer_word_wrapper_preserve_indentation_lots_of_whitespace() {
+ let width = 10;
+ let text = " 4 Indent\n must wrap!";
+ let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: false }, text, width);
+ assert_eq!(
+ word_wrapper,
+ vec![
+ " ",
+ " 4",
+ "Indent",
+ " ",
+ " must",
+ "wrap!"
+ ]
+ );
+ }
+}