aboutsummaryrefslogtreecommitdiff
path: root/helix-core/src/doc_formatter.rs
diff options
context:
space:
mode:
Diffstat (limited to 'helix-core/src/doc_formatter.rs')
-rw-r--r--helix-core/src/doc_formatter.rs384
1 files changed, 384 insertions, 0 deletions
diff --git a/helix-core/src/doc_formatter.rs b/helix-core/src/doc_formatter.rs
new file mode 100644
index 00000000..c7dc9081
--- /dev/null
+++ b/helix-core/src/doc_formatter.rs
@@ -0,0 +1,384 @@
+//! The `DocumentFormatter` forms the bridge between the raw document text
+//! and onscreen positioning. It yields the text graphemes as an iterator
+//! and traverses (part) of the document text. During that traversal it
+//! handles grapheme detection, softwrapping and annotations.
+//! It yields `FormattedGrapheme`s and their corresponding visual coordinates.
+//!
+//! As both virtual text and softwrapping can insert additional lines into the document
+//! it is generally not possible to find the start of the previous visual line.
+//! Instead the `DocumentFormatter` starts at the last "checkpoint" (usually a linebreak)
+//! called a "block" and the caller must advance it as needed.
+
+use std::borrow::Cow;
+use std::fmt::Debug;
+use std::mem::{replace, take};
+
+#[cfg(test)]
+mod test;
+
+use unicode_segmentation::{Graphemes, UnicodeSegmentation};
+
+use crate::graphemes::{Grapheme, GraphemeStr};
+use crate::syntax::Highlight;
+use crate::text_annotations::TextAnnotations;
+use crate::{Position, RopeGraphemes, RopeSlice};
+
+/// TODO make Highlight a u32 to reduce the size of this enum to a single word.
+#[derive(Debug, Clone, Copy)]
+pub enum GraphemeSource {
+ Document {
+ codepoints: u32,
+ },
+ /// Inline virtual text can not be highlighted with a `Highlight` iterator
+ /// because it's not part of the document. Instead the `Highlight`
+ /// is emitted right by the document formatter
+ VirtualText {
+ highlight: Option<Highlight>,
+ },
+}
+
+#[derive(Debug, Clone)]
+pub struct FormattedGrapheme<'a> {
+ pub grapheme: Grapheme<'a>,
+ pub source: GraphemeSource,
+}
+
+impl<'a> FormattedGrapheme<'a> {
+ pub fn new(
+ g: GraphemeStr<'a>,
+ visual_x: usize,
+ tab_width: u16,
+ source: GraphemeSource,
+ ) -> FormattedGrapheme<'a> {
+ FormattedGrapheme {
+ grapheme: Grapheme::new(g, visual_x, tab_width),
+ source,
+ }
+ }
+ /// Returns whether this grapheme is virtual inline text
+ pub fn is_virtual(&self) -> bool {
+ matches!(self.source, GraphemeSource::VirtualText { .. })
+ }
+
+ pub fn placeholder() -> Self {
+ FormattedGrapheme {
+ grapheme: Grapheme::Other { g: " ".into() },
+ source: GraphemeSource::Document { codepoints: 0 },
+ }
+ }
+
+ pub fn doc_chars(&self) -> usize {
+ match self.source {
+ GraphemeSource::Document { codepoints } => codepoints as usize,
+ GraphemeSource::VirtualText { .. } => 0,
+ }
+ }
+
+ pub fn is_whitespace(&self) -> bool {
+ self.grapheme.is_whitespace()
+ }
+
+ pub fn width(&self) -> usize {
+ self.grapheme.width()
+ }
+
+ pub fn is_word_boundary(&self) -> bool {
+ self.grapheme.is_word_boundary()
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct TextFormat {
+ pub soft_wrap: bool,
+ pub tab_width: u16,
+ pub max_wrap: u16,
+ pub max_indent_retain: u16,
+ pub wrap_indicator: Box<str>,
+ pub wrap_indicator_highlight: Option<Highlight>,
+ pub viewport_width: u16,
+}
+
+// test implementation is basically only used for testing or when softwrap is always disabled
+impl Default for TextFormat {
+ fn default() -> Self {
+ TextFormat {
+ soft_wrap: false,
+ tab_width: 4,
+ max_wrap: 3,
+ max_indent_retain: 4,
+ wrap_indicator: Box::from(" "),
+ viewport_width: 17,
+ wrap_indicator_highlight: None,
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct DocumentFormatter<'t> {
+ text_fmt: &'t TextFormat,
+ annotations: &'t TextAnnotations,
+
+ /// The visual position at the end of the last yielded word boundary
+ visual_pos: Position,
+ graphemes: RopeGraphemes<'t>,
+ /// The character pos of the `graphemes` iter used for inserting annotations
+ char_pos: usize,
+ /// The line pos of the `graphemes` iter used for inserting annotations
+ line_pos: usize,
+ exhausted: bool,
+
+ /// Line breaks to be reserved for virtual text
+ /// at the next line break
+ virtual_lines: usize,
+ inline_anntoation_graphemes: Option<(Graphemes<'t>, Option<Highlight>)>,
+
+ // softwrap specific
+ /// The indentation of the current line
+ /// Is set to `None` if the indentation level is not yet known
+ /// because no non-whitespace graphemes have been encountered yet
+ indent_level: Option<usize>,
+ /// In case a long word needs to be split a single grapheme might need to be wrapped
+ /// while the rest of the word stays on the same line
+ peeked_grapheme: Option<(FormattedGrapheme<'t>, usize)>,
+ /// A first-in first-out (fifo) buffer for the Graphemes of any given word
+ word_buf: Vec<FormattedGrapheme<'t>>,
+ /// The index of the next grapheme that will be yielded from the `word_buf`
+ word_i: usize,
+}
+
+impl<'t> DocumentFormatter<'t> {
+ /// Creates a new formatter at the last block before `char_idx`.
+ /// A block is a chunk which always ends with a linebreak.
+ /// This is usually just a normal line break.
+ /// However very long lines are always wrapped at constant intervals that can be cheaply calculated
+ /// to avoid pathological behaviour.
+ pub fn new_at_prev_checkpoint(
+ text: RopeSlice<'t>,
+ text_fmt: &'t TextFormat,
+ annotations: &'t TextAnnotations,
+ char_idx: usize,
+ ) -> (Self, usize) {
+ // TODO divide long lines into blocks to avoid bad performance for long lines
+ let block_line_idx = text.char_to_line(char_idx.min(text.len_chars()));
+ let block_char_idx = text.line_to_char(block_line_idx);
+ annotations.reset_pos(block_char_idx);
+ (
+ DocumentFormatter {
+ text_fmt,
+ annotations,
+ visual_pos: Position { row: 0, col: 0 },
+ graphemes: RopeGraphemes::new(text.slice(block_char_idx..)),
+ char_pos: block_char_idx,
+ exhausted: false,
+ virtual_lines: 0,
+ indent_level: None,
+ peeked_grapheme: None,
+ word_buf: Vec::with_capacity(64),
+ word_i: 0,
+ line_pos: block_line_idx,
+ inline_anntoation_graphemes: None,
+ },
+ block_char_idx,
+ )
+ }
+
+ fn next_inline_annotation_grapheme(&mut self) -> Option<(&'t str, Option<Highlight>)> {
+ loop {
+ if let Some(&mut (ref mut annotation, highlight)) =
+ self.inline_anntoation_graphemes.as_mut()
+ {
+ if let Some(grapheme) = annotation.next() {
+ return Some((grapheme, highlight));
+ }
+ }
+
+ if let Some((annotation, highlight)) =
+ self.annotations.next_inline_annotation_at(self.char_pos)
+ {
+ self.inline_anntoation_graphemes = Some((
+ UnicodeSegmentation::graphemes(&*annotation.text, true),
+ highlight,
+ ))
+ } else {
+ return None;
+ }
+ }
+ }
+
+ fn advance_grapheme(&mut self, col: usize) -> Option<FormattedGrapheme<'t>> {
+ let (grapheme, source) =
+ if let Some((grapheme, highlight)) = self.next_inline_annotation_grapheme() {
+ (grapheme.into(), GraphemeSource::VirtualText { highlight })
+ } else if let Some(grapheme) = self.graphemes.next() {
+ self.virtual_lines += self.annotations.annotation_lines_at(self.char_pos);
+ let codepoints = grapheme.len_chars() as u32;
+
+ let overlay = self.annotations.overlay_at(self.char_pos);
+ let grapheme = match overlay {
+ Some((overlay, _)) => overlay.grapheme.as_str().into(),
+ None => Cow::from(grapheme).into(),
+ };
+
+ self.char_pos += codepoints as usize;
+ (grapheme, GraphemeSource::Document { codepoints })
+ } else {
+ if self.exhausted {
+ return None;
+ }
+ self.exhausted = true;
+ // EOF grapheme is required for rendering
+ // and correct position computations
+ return Some(FormattedGrapheme {
+ grapheme: Grapheme::Other { g: " ".into() },
+ source: GraphemeSource::Document { codepoints: 0 },
+ });
+ };
+
+ let grapheme = FormattedGrapheme::new(grapheme, col, self.text_fmt.tab_width, source);
+
+ Some(grapheme)
+ }
+
+ /// Move a word to the next visual line
+ fn wrap_word(&mut self, virtual_lines_before_word: usize) -> usize {
+ // softwrap this word to the next line
+ let indent_carry_over = if let Some(indent) = self.indent_level {
+ if indent as u16 <= self.text_fmt.max_indent_retain {
+ indent as u16
+ } else {
+ 0
+ }
+ } else {
+ // ensure the indent stays 0
+ self.indent_level = Some(0);
+ 0
+ };
+
+ self.visual_pos.col = indent_carry_over as usize;
+ self.virtual_lines -= virtual_lines_before_word;
+ self.visual_pos.row += 1 + virtual_lines_before_word;
+ let mut i = 0;
+ let mut word_width = 0;
+ let wrap_indicator = UnicodeSegmentation::graphemes(&*self.text_fmt.wrap_indicator, true)
+ .map(|g| {
+ i += 1;
+ let grapheme = FormattedGrapheme::new(
+ g.into(),
+ self.visual_pos.col + word_width,
+ self.text_fmt.tab_width,
+ GraphemeSource::VirtualText {
+ highlight: self.text_fmt.wrap_indicator_highlight,
+ },
+ );
+ word_width += grapheme.width();
+ grapheme
+ });
+ self.word_buf.splice(0..0, wrap_indicator);
+
+ for grapheme in &mut self.word_buf[i..] {
+ let visual_x = self.visual_pos.col + word_width;
+ grapheme
+ .grapheme
+ .change_position(visual_x, self.text_fmt.tab_width);
+ word_width += grapheme.width();
+ }
+ word_width
+ }
+
+ fn advance_to_next_word(&mut self) {
+ self.word_buf.clear();
+ let mut word_width = 0;
+ let virtual_lines_before_word = self.virtual_lines;
+ let mut virtual_lines_before_grapheme = self.virtual_lines;
+
+ loop {
+ // softwrap word if necessary
+ if word_width + self.visual_pos.col >= self.text_fmt.viewport_width as usize {
+ // wrapping this word would move too much text to the next line
+ // split the word at the line end instead
+ if word_width > self.text_fmt.max_wrap as usize {
+ // Usually we stop accomulating graphemes as soon as softwrapping becomes necessary.
+ // However if the last grapheme is multiple columns wide it might extend beyond the EOL.
+ // The condition below ensures that this grapheme is not cutoff and instead wrapped to the next line
+ if word_width + self.visual_pos.col > self.text_fmt.viewport_width as usize {
+ self.peeked_grapheme = self.word_buf.pop().map(|grapheme| {
+ (grapheme, self.virtual_lines - virtual_lines_before_grapheme)
+ });
+ self.virtual_lines = virtual_lines_before_grapheme;
+ }
+ return;
+ }
+
+ word_width = self.wrap_word(virtual_lines_before_word);
+ }
+
+ virtual_lines_before_grapheme = self.virtual_lines;
+
+ let grapheme = if let Some((grapheme, virtual_lines)) = self.peeked_grapheme.take() {
+ self.virtual_lines += virtual_lines;
+ grapheme
+ } else if let Some(grapheme) = self.advance_grapheme(self.visual_pos.col + word_width) {
+ grapheme
+ } else {
+ return;
+ };
+
+ // Track indentation
+ if !grapheme.is_whitespace() && self.indent_level.is_none() {
+ self.indent_level = Some(self.visual_pos.col);
+ } else if grapheme.grapheme == Grapheme::Newline {
+ self.indent_level = None;
+ }
+
+ let is_word_boundary = grapheme.is_word_boundary();
+ word_width += grapheme.width();
+ self.word_buf.push(grapheme);
+
+ if is_word_boundary {
+ return;
+ }
+ }
+ }
+
+ /// returns the document line pos of the **next** grapheme that will be yielded
+ pub fn line_pos(&self) -> usize {
+ self.line_pos
+ }
+
+ /// returns the visual pos of the **next** grapheme that will be yielded
+ pub fn visual_pos(&self) -> Position {
+ self.visual_pos
+ }
+}
+
+impl<'t> Iterator for DocumentFormatter<'t> {
+ type Item = (FormattedGrapheme<'t>, Position);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let grapheme = if self.text_fmt.soft_wrap {
+ if self.word_i >= self.word_buf.len() {
+ self.advance_to_next_word();
+ self.word_i = 0;
+ }
+ let grapheme = replace(
+ self.word_buf.get_mut(self.word_i)?,
+ FormattedGrapheme::placeholder(),
+ );
+ self.word_i += 1;
+ grapheme
+ } else {
+ self.advance_grapheme(self.visual_pos.col)?
+ };
+
+ let pos = self.visual_pos;
+ if grapheme.grapheme == Grapheme::Newline {
+ self.visual_pos.row += 1;
+ self.visual_pos.row += take(&mut self.virtual_lines);
+ self.visual_pos.col = 0;
+ self.line_pos += 1;
+ } else {
+ self.visual_pos.col += grapheme.width();
+ }
+ Some((grapheme, pos))
+ }
+}