From 4dcf1fe66ba30a78edc054780d9b65c2f826530f Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Tue, 31 Jan 2023 18:03:19 +0100 Subject: rework positioning/rendering and enable softwrap/virtual text (#5420) * rework positioning/rendering, enables softwrap/virtual text This commit is a large rework of the core text positioning and rendering code in helix to remove the assumption that on-screen columns/lines correspond to text columns/lines. A generic `DocFormatter` is introduced that positions graphemes on and is used both for rendering and for movements/scrolling. Both virtual text support (inline, grapheme overlay and multi-line) and a capable softwrap implementation is included. fix picker highlight cleanup doc formatter, use word bondaries for wrapping make visual vertical movement a seperate commnad estimate line gutter width to improve performance cache cursor position cleanup and optimize doc formatter cleanup documentation fix typos Co-authored-by: Daniel Hines update documentation fix panic in last_visual_line funciton improve soft-wrap documentation add extend_visual_line_up/down commands fix non-visual vertical movement streamline virtual text highlighting, add softwrap indicator fix cursor position if softwrap is disabled improve documentation of text_annotations module avoid crashes if view anchor is out of bounds fix: consider horizontal offset when traslation char_idx -> vpos improve default configuration fix: mixed up horizontal and vertical offset reset view position after config reload apply suggestions from review disabled softwrap for very small screens to avoid endless spin fix wrap_indicator setting fix bar cursor disappearring on the EOF character add keybinding for linewise vertical movement fix: inconsistent gutter highlights improve virtual text API make scope idx lookup more ergonomic allow overlapping overlays correctly track char_pos for virtual text adjust configuration deprecate old position fucntions fix infinite loop in highlight lookup fix gutter style fix formatting document max-line-width interaction with softwrap change wrap-indicator example to use empty string fix: rare panic when view is in invalid state (bis) * Apply suggestions from code review Co-authored-by: Michael Davis * improve documentation for positoning functions * simplify tests * fix documentation of Grapheme::width * Apply suggestions from code review Co-authored-by: Michael Davis * add explicit drop invocation * Add explicit MoveFn type alias * add docuntation to Editor::cursor_cache * fix a few typos * explain use of allow(deprecated) * make gj and gk extend in select mode * remove unneded debug and TODO * mark tab_width_at #[inline] * add fast-path to move_vertically_visual in case softwrap is disabled * rename first_line to first_visual_line * simplify duplicate if/else --------- Co-authored-by: Michael Davis --- helix-core/src/doc_formatter.rs | 384 +++++++++++++++++++++++++++++++ helix-core/src/doc_formatter/test.rs | 222 ++++++++++++++++++ helix-core/src/graphemes.rs | 183 ++++++++++++++- helix-core/src/lib.rs | 8 +- helix-core/src/movement.rs | 202 +++++++++++++++-- helix-core/src/position.rs | 428 ++++++++++++++++++++++++++++++++++- helix-core/src/selection.rs | 32 +-- helix-core/src/text_annotations.rs | 271 ++++++++++++++++++++++ 8 files changed, 1690 insertions(+), 40 deletions(-) create mode 100644 helix-core/src/doc_formatter.rs create mode 100644 helix-core/src/doc_formatter/test.rs create mode 100644 helix-core/src/text_annotations.rs (limited to 'helix-core') diff --git a/helix-core/src/doc_formatter.rs b/helix-core/src/doc_formatter.rs new file mode 100644 index 00000000..c7dc9081 --- /dev/null +++ b/helix-core/src/doc_formatter.rs @@ -0,0 +1,384 @@ +//! The `DocumentFormatter` forms the bridge between the raw document text +//! and onscreen positioning. It yields the text graphemes as an iterator +//! and traverses (part) of the document text. During that traversal it +//! handles grapheme detection, softwrapping and annotations. +//! It yields `FormattedGrapheme`s and their corresponding visual coordinates. +//! +//! As both virtual text and softwrapping can insert additional lines into the document +//! it is generally not possible to find the start of the previous visual line. +//! Instead the `DocumentFormatter` starts at the last "checkpoint" (usually a linebreak) +//! called a "block" and the caller must advance it as needed. + +use std::borrow::Cow; +use std::fmt::Debug; +use std::mem::{replace, take}; + +#[cfg(test)] +mod test; + +use unicode_segmentation::{Graphemes, UnicodeSegmentation}; + +use crate::graphemes::{Grapheme, GraphemeStr}; +use crate::syntax::Highlight; +use crate::text_annotations::TextAnnotations; +use crate::{Position, RopeGraphemes, RopeSlice}; + +/// TODO make Highlight a u32 to reduce the size of this enum to a single word. +#[derive(Debug, Clone, Copy)] +pub enum GraphemeSource { + Document { + codepoints: u32, + }, + /// Inline virtual text can not be highlighted with a `Highlight` iterator + /// because it's not part of the document. Instead the `Highlight` + /// is emitted right by the document formatter + VirtualText { + highlight: Option, + }, +} + +#[derive(Debug, Clone)] +pub struct FormattedGrapheme<'a> { + pub grapheme: Grapheme<'a>, + pub source: GraphemeSource, +} + +impl<'a> FormattedGrapheme<'a> { + pub fn new( + g: GraphemeStr<'a>, + visual_x: usize, + tab_width: u16, + source: GraphemeSource, + ) -> FormattedGrapheme<'a> { + FormattedGrapheme { + grapheme: Grapheme::new(g, visual_x, tab_width), + source, + } + } + /// Returns whether this grapheme is virtual inline text + pub fn is_virtual(&self) -> bool { + matches!(self.source, GraphemeSource::VirtualText { .. }) + } + + pub fn placeholder() -> Self { + FormattedGrapheme { + grapheme: Grapheme::Other { g: " ".into() }, + source: GraphemeSource::Document { codepoints: 0 }, + } + } + + pub fn doc_chars(&self) -> usize { + match self.source { + GraphemeSource::Document { codepoints } => codepoints as usize, + GraphemeSource::VirtualText { .. } => 0, + } + } + + pub fn is_whitespace(&self) -> bool { + self.grapheme.is_whitespace() + } + + pub fn width(&self) -> usize { + self.grapheme.width() + } + + pub fn is_word_boundary(&self) -> bool { + self.grapheme.is_word_boundary() + } +} + +#[derive(Debug, Clone)] +pub struct TextFormat { + pub soft_wrap: bool, + pub tab_width: u16, + pub max_wrap: u16, + pub max_indent_retain: u16, + pub wrap_indicator: Box, + pub wrap_indicator_highlight: Option, + pub viewport_width: u16, +} + +// test implementation is basically only used for testing or when softwrap is always disabled +impl Default for TextFormat { + fn default() -> Self { + TextFormat { + soft_wrap: false, + tab_width: 4, + max_wrap: 3, + max_indent_retain: 4, + wrap_indicator: Box::from(" "), + viewport_width: 17, + wrap_indicator_highlight: None, + } + } +} + +#[derive(Debug)] +pub struct DocumentFormatter<'t> { + text_fmt: &'t TextFormat, + annotations: &'t TextAnnotations, + + /// The visual position at the end of the last yielded word boundary + visual_pos: Position, + graphemes: RopeGraphemes<'t>, + /// The character pos of the `graphemes` iter used for inserting annotations + char_pos: usize, + /// The line pos of the `graphemes` iter used for inserting annotations + line_pos: usize, + exhausted: bool, + + /// Line breaks to be reserved for virtual text + /// at the next line break + virtual_lines: usize, + inline_anntoation_graphemes: Option<(Graphemes<'t>, Option)>, + + // softwrap specific + /// The indentation of the current line + /// Is set to `None` if the indentation level is not yet known + /// because no non-whitespace graphemes have been encountered yet + indent_level: Option, + /// In case a long word needs to be split a single grapheme might need to be wrapped + /// while the rest of the word stays on the same line + peeked_grapheme: Option<(FormattedGrapheme<'t>, usize)>, + /// A first-in first-out (fifo) buffer for the Graphemes of any given word + word_buf: Vec>, + /// The index of the next grapheme that will be yielded from the `word_buf` + word_i: usize, +} + +impl<'t> DocumentFormatter<'t> { + /// Creates a new formatter at the last block before `char_idx`. + /// A block is a chunk which always ends with a linebreak. + /// This is usually just a normal line break. + /// However very long lines are always wrapped at constant intervals that can be cheaply calculated + /// to avoid pathological behaviour. + pub fn new_at_prev_checkpoint( + text: RopeSlice<'t>, + text_fmt: &'t TextFormat, + annotations: &'t TextAnnotations, + char_idx: usize, + ) -> (Self, usize) { + // TODO divide long lines into blocks to avoid bad performance for long lines + let block_line_idx = text.char_to_line(char_idx.min(text.len_chars())); + let block_char_idx = text.line_to_char(block_line_idx); + annotations.reset_pos(block_char_idx); + ( + DocumentFormatter { + text_fmt, + annotations, + visual_pos: Position { row: 0, col: 0 }, + graphemes: RopeGraphemes::new(text.slice(block_char_idx..)), + char_pos: block_char_idx, + exhausted: false, + virtual_lines: 0, + indent_level: None, + peeked_grapheme: None, + word_buf: Vec::with_capacity(64), + word_i: 0, + line_pos: block_line_idx, + inline_anntoation_graphemes: None, + }, + block_char_idx, + ) + } + + fn next_inline_annotation_grapheme(&mut self) -> Option<(&'t str, Option)> { + loop { + if let Some(&mut (ref mut annotation, highlight)) = + self.inline_anntoation_graphemes.as_mut() + { + if let Some(grapheme) = annotation.next() { + return Some((grapheme, highlight)); + } + } + + if let Some((annotation, highlight)) = + self.annotations.next_inline_annotation_at(self.char_pos) + { + self.inline_anntoation_graphemes = Some(( + UnicodeSegmentation::graphemes(&*annotation.text, true), + highlight, + )) + } else { + return None; + } + } + } + + fn advance_grapheme(&mut self, col: usize) -> Option> { + let (grapheme, source) = + if let Some((grapheme, highlight)) = self.next_inline_annotation_grapheme() { + (grapheme.into(), GraphemeSource::VirtualText { highlight }) + } else if let Some(grapheme) = self.graphemes.next() { + self.virtual_lines += self.annotations.annotation_lines_at(self.char_pos); + let codepoints = grapheme.len_chars() as u32; + + let overlay = self.annotations.overlay_at(self.char_pos); + let grapheme = match overlay { + Some((overlay, _)) => overlay.grapheme.as_str().into(), + None => Cow::from(grapheme).into(), + }; + + self.char_pos += codepoints as usize; + (grapheme, GraphemeSource::Document { codepoints }) + } else { + if self.exhausted { + return None; + } + self.exhausted = true; + // EOF grapheme is required for rendering + // and correct position computations + return Some(FormattedGrapheme { + grapheme: Grapheme::Other { g: " ".into() }, + source: GraphemeSource::Document { codepoints: 0 }, + }); + }; + + let grapheme = FormattedGrapheme::new(grapheme, col, self.text_fmt.tab_width, source); + + Some(grapheme) + } + + /// Move a word to the next visual line + fn wrap_word(&mut self, virtual_lines_before_word: usize) -> usize { + // softwrap this word to the next line + let indent_carry_over = if let Some(indent) = self.indent_level { + if indent as u16 <= self.text_fmt.max_indent_retain { + indent as u16 + } else { + 0 + } + } else { + // ensure the indent stays 0 + self.indent_level = Some(0); + 0 + }; + + self.visual_pos.col = indent_carry_over as usize; + self.virtual_lines -= virtual_lines_before_word; + self.visual_pos.row += 1 + virtual_lines_before_word; + let mut i = 0; + let mut word_width = 0; + let wrap_indicator = UnicodeSegmentation::graphemes(&*self.text_fmt.wrap_indicator, true) + .map(|g| { + i += 1; + let grapheme = FormattedGrapheme::new( + g.into(), + self.visual_pos.col + word_width, + self.text_fmt.tab_width, + GraphemeSource::VirtualText { + highlight: self.text_fmt.wrap_indicator_highlight, + }, + ); + word_width += grapheme.width(); + grapheme + }); + self.word_buf.splice(0..0, wrap_indicator); + + for grapheme in &mut self.word_buf[i..] { + let visual_x = self.visual_pos.col + word_width; + grapheme + .grapheme + .change_position(visual_x, self.text_fmt.tab_width); + word_width += grapheme.width(); + } + word_width + } + + fn advance_to_next_word(&mut self) { + self.word_buf.clear(); + let mut word_width = 0; + let virtual_lines_before_word = self.virtual_lines; + let mut virtual_lines_before_grapheme = self.virtual_lines; + + loop { + // softwrap word if necessary + if word_width + self.visual_pos.col >= self.text_fmt.viewport_width as usize { + // wrapping this word would move too much text to the next line + // split the word at the line end instead + if word_width > self.text_fmt.max_wrap as usize { + // Usually we stop accomulating graphemes as soon as softwrapping becomes necessary. + // However if the last grapheme is multiple columns wide it might extend beyond the EOL. + // The condition below ensures that this grapheme is not cutoff and instead wrapped to the next line + if word_width + self.visual_pos.col > self.text_fmt.viewport_width as usize { + self.peeked_grapheme = self.word_buf.pop().map(|grapheme| { + (grapheme, self.virtual_lines - virtual_lines_before_grapheme) + }); + self.virtual_lines = virtual_lines_before_grapheme; + } + return; + } + + word_width = self.wrap_word(virtual_lines_before_word); + } + + virtual_lines_before_grapheme = self.virtual_lines; + + let grapheme = if let Some((grapheme, virtual_lines)) = self.peeked_grapheme.take() { + self.virtual_lines += virtual_lines; + grapheme + } else if let Some(grapheme) = self.advance_grapheme(self.visual_pos.col + word_width) { + grapheme + } else { + return; + }; + + // Track indentation + if !grapheme.is_whitespace() && self.indent_level.is_none() { + self.indent_level = Some(self.visual_pos.col); + } else if grapheme.grapheme == Grapheme::Newline { + self.indent_level = None; + } + + let is_word_boundary = grapheme.is_word_boundary(); + word_width += grapheme.width(); + self.word_buf.push(grapheme); + + if is_word_boundary { + return; + } + } + } + + /// returns the document line pos of the **next** grapheme that will be yielded + pub fn line_pos(&self) -> usize { + self.line_pos + } + + /// returns the visual pos of the **next** grapheme that will be yielded + pub fn visual_pos(&self) -> Position { + self.visual_pos + } +} + +impl<'t> Iterator for DocumentFormatter<'t> { + type Item = (FormattedGrapheme<'t>, Position); + + fn next(&mut self) -> Option { + let grapheme = if self.text_fmt.soft_wrap { + if self.word_i >= self.word_buf.len() { + self.advance_to_next_word(); + self.word_i = 0; + } + let grapheme = replace( + self.word_buf.get_mut(self.word_i)?, + FormattedGrapheme::placeholder(), + ); + self.word_i += 1; + grapheme + } else { + self.advance_grapheme(self.visual_pos.col)? + }; + + let pos = self.visual_pos; + if grapheme.grapheme == Grapheme::Newline { + self.visual_pos.row += 1; + self.visual_pos.row += take(&mut self.virtual_lines); + self.visual_pos.col = 0; + self.line_pos += 1; + } else { + self.visual_pos.col += grapheme.width(); + } + Some((grapheme, pos)) + } +} diff --git a/helix-core/src/doc_formatter/test.rs b/helix-core/src/doc_formatter/test.rs new file mode 100644 index 00000000..e68b31fd --- /dev/null +++ b/helix-core/src/doc_formatter/test.rs @@ -0,0 +1,222 @@ +use std::rc::Rc; + +use crate::doc_formatter::{DocumentFormatter, TextFormat}; +use crate::text_annotations::{InlineAnnotation, Overlay, TextAnnotations}; + +impl TextFormat { + fn new_test(softwrap: bool) -> Self { + TextFormat { + soft_wrap: softwrap, + tab_width: 2, + max_wrap: 3, + max_indent_retain: 4, + wrap_indicator: ".".into(), + wrap_indicator_highlight: None, + // use a prime number to allow lining up too often with repeat + viewport_width: 17, + } + } +} + +impl<'t> DocumentFormatter<'t> { + fn collect_to_str(&mut self) -> String { + use std::fmt::Write; + let mut res = String::new(); + let viewport_width = self.text_fmt.viewport_width; + let mut line = 0; + + for (grapheme, pos) in self { + if pos.row != line { + line += 1; + assert_eq!(pos.row, line); + write!(res, "\n{}", ".".repeat(pos.col)).unwrap(); + assert!( + pos.col <= viewport_width as usize, + "softwrapped failed {}<={viewport_width}", + pos.col + ); + } + write!(res, "{}", grapheme.grapheme).unwrap(); + } + + res + } +} + +fn softwrap_text(text: &str) -> String { + DocumentFormatter::new_at_prev_checkpoint( + text.into(), + &TextFormat::new_test(true), + &TextAnnotations::default(), + 0, + ) + .0 + .collect_to_str() +} + +#[test] +fn basic_softwrap() { + assert_eq!( + softwrap_text(&"foo ".repeat(10)), + "foo foo foo foo \n.foo foo foo foo \n.foo foo " + ); + assert_eq!( + softwrap_text(&"fooo ".repeat(10)), + "fooo fooo fooo \n.fooo fooo fooo \n.fooo fooo fooo \n.fooo " + ); + + // check that we don't wrap unnecessarily + assert_eq!(softwrap_text("\t\txxxx1xxxx2xx\n"), " xxxx1xxxx2xx \n "); +} + +#[test] +fn softwrap_indentation() { + assert_eq!( + softwrap_text("\t\tfoo1 foo2 foo3 foo4 foo5 foo6\n"), + " foo1 foo2 \n.....foo3 foo4 \n.....foo5 foo6 \n " + ); + assert_eq!( + softwrap_text("\t\t\tfoo1 foo2 foo3 foo4 foo5 foo6\n"), + " foo1 foo2 \n.foo3 foo4 foo5 \n.foo6 \n " + ); +} + +#[test] +fn long_word_softwrap() { + assert_eq!( + softwrap_text("\t\txxxx1xxxx2xxxx3xxxx4xxxx5xxxx6xxxx7xxxx8xxxx9xxx\n"), + " xxxx1xxxx2xxx\n.....x3xxxx4xxxx5\n.....xxxx6xxxx7xx\n.....xx8xxxx9xxx \n " + ); + assert_eq!( + softwrap_text("xxxxxxxx1xxxx2xxx\n"), + "xxxxxxxx1xxxx2xxx\n. \n " + ); + assert_eq!( + softwrap_text("\t\txxxx1xxxx 2xxxx3xxxx4xxxx5xxxx6xxxx7xxxx8xxxx9xxx\n"), + " xxxx1xxxx \n.....2xxxx3xxxx4x\n.....xxx5xxxx6xxx\n.....x7xxxx8xxxx9\n.....xxx \n " + ); + assert_eq!( + softwrap_text("\t\txxxx1xxx 2xxxx3xxxx4xxxx5xxxx6xxxx7xxxx8xxxx9xxx\n"), + " xxxx1xxx 2xxx\n.....x3xxxx4xxxx5\n.....xxxx6xxxx7xx\n.....xx8xxxx9xxx \n " + ); +} + +fn overlay_text(text: &str, char_pos: usize, softwrap: bool, overlays: &[Overlay]) -> String { + DocumentFormatter::new_at_prev_checkpoint( + text.into(), + &TextFormat::new_test(softwrap), + TextAnnotations::default().add_overlay(overlays.into(), None), + char_pos, + ) + .0 + .collect_to_str() +} + +#[test] +fn overlay() { + assert_eq!( + overlay_text( + "foobar", + 0, + false, + &[ + Overlay { + char_idx: 0, + grapheme: "X".into(), + }, + Overlay { + char_idx: 2, + grapheme: "\t".into(), + }, + ] + ), + "Xo bar " + ); + assert_eq!( + overlay_text( + &"foo ".repeat(10), + 0, + true, + &[ + Overlay { + char_idx: 2, + grapheme: "\t".into(), + }, + Overlay { + char_idx: 5, + grapheme: "\t".into(), + }, + Overlay { + char_idx: 16, + grapheme: "X".into(), + }, + ] + ), + "fo f o foo \n.foo Xoo foo foo \n.foo foo foo " + ); +} + +fn annotate_text(text: &str, softwrap: bool, annotations: &[InlineAnnotation]) -> String { + DocumentFormatter::new_at_prev_checkpoint( + text.into(), + &TextFormat::new_test(softwrap), + TextAnnotations::default().add_inline_annotations(annotations.into(), None), + 0, + ) + .0 + .collect_to_str() +} + +#[test] +fn annotation() { + assert_eq!( + annotate_text( + "bar", + false, + &[InlineAnnotation { + char_idx: 0, + text: "foo".into(), + }] + ), + "foobar " + ); + assert_eq!( + annotate_text( + &"foo ".repeat(10), + true, + &[InlineAnnotation { + char_idx: 0, + text: "foo ".into(), + }] + ), + "foo foo foo foo \n.foo foo foo foo \n.foo foo foo " + ); +} +#[test] +fn annotation_and_overlay() { + assert_eq!( + DocumentFormatter::new_at_prev_checkpoint( + "bbar".into(), + &TextFormat::new_test(false), + TextAnnotations::default() + .add_inline_annotations( + Rc::new([InlineAnnotation { + char_idx: 0, + text: "fooo".into(), + }]), + None + ) + .add_overlay( + Rc::new([Overlay { + char_idx: 0, + grapheme: "\t".into(), + }]), + None + ), + 0, + ) + .0 + .collect_to_str(), + "fooo bar " + ); +} diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index 675f5750..15ef3eb0 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -5,7 +5,88 @@ use ropey::{iter::Chunks, str_utils::byte_to_char_idx, RopeSlice}; use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete}; use unicode_width::UnicodeWidthStr; -use std::fmt; +use std::borrow::Cow; +use std::fmt::{self, Debug, Display}; +use std::marker::PhantomData; +use std::ops::Deref; +use std::ptr::NonNull; +use std::{slice, str}; + +use crate::chars::{char_is_whitespace, char_is_word}; +use crate::LineEnding; + +#[inline] +pub fn tab_width_at(visual_x: usize, tab_width: u16) -> usize { + tab_width as usize - (visual_x % tab_width as usize) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Grapheme<'a> { + Newline, + Tab { width: usize }, + Other { g: GraphemeStr<'a> }, +} + +impl<'a> Grapheme<'a> { + pub fn new(g: GraphemeStr<'a>, visual_x: usize, tab_width: u16) -> Grapheme<'a> { + match g { + g if g == "\t" => Grapheme::Tab { + width: tab_width_at(visual_x, tab_width), + }, + _ if LineEnding::from_str(&g).is_some() => Grapheme::Newline, + _ => Grapheme::Other { g }, + } + } + + pub fn change_position(&mut self, visual_x: usize, tab_width: u16) { + if let Grapheme::Tab { width } = self { + *width = tab_width_at(visual_x, tab_width) + } + } + + /// Returns the a visual width of this grapheme, + #[inline] + pub fn width(&self) -> usize { + match *self { + // width is not cached because we are dealing with + // ASCII almost all the time which already has a fastpath + // it's okay to convert to u16 here because no codepoint has a width larger + // than 2 and graphemes are usually atmost two visible codepoints wide + Grapheme::Other { ref g } => grapheme_width(g), + Grapheme::Tab { width } => width, + Grapheme::Newline => 1, + } + } + + pub fn is_whitespace(&self) -> bool { + !matches!(&self, Grapheme::Other { g } if !g.chars().all(char_is_whitespace)) + } + + // TODO currently word boundaries are used for softwrapping. + // This works best for programming languages and well for prose. + // This could however be improved in the future by considering unicode + // character classes but + pub fn is_word_boundary(&self) -> bool { + !matches!(&self, Grapheme::Other { g,.. } if g.chars().all(char_is_word)) + } +} + +impl Display for Grapheme<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Grapheme::Newline => write!(f, " "), + Grapheme::Tab { width } => { + for _ in 0..width { + write!(f, " ")?; + } + Ok(()) + } + Grapheme::Other { ref g } => { + write!(f, "{g}") + } + } + } +} #[must_use] pub fn grapheme_width(g: &str) -> usize { @@ -27,6 +108,8 @@ pub fn grapheme_width(g: &str) -> usize { // We use max(1) here because all grapeheme clusters--even illformed // ones--should have at least some width so they can be edited // properly. + // TODO properly handle unicode width for all codepoints + // example of where unicode width is currently wrong: 🤦🏼‍♂️ (taken from https://hsivonen.fi/string-length/) UnicodeWidthStr::width(g).max(1) } } @@ -341,3 +424,101 @@ impl<'a> Iterator for RopeGraphemes<'a> { } } } + +/// A highly compressed Cow<'a, str> that holds +/// atmost u31::MAX bytes and is readonly +pub struct GraphemeStr<'a> { + ptr: NonNull, + len: u32, + phantom: PhantomData<&'a str>, +} + +impl GraphemeStr<'_> { + const MASK_OWNED: u32 = 1 << 31; + + fn compute_len(&self) -> usize { + (self.len & !Self::MASK_OWNED) as usize + } +} + +impl Deref for GraphemeStr<'_> { + type Target = str; + fn deref(&self) -> &Self::Target { + unsafe { + let bytes = slice::from_raw_parts(self.ptr.as_ptr(), self.compute_len()); + str::from_utf8_unchecked(bytes) + } + } +} + +impl Drop for GraphemeStr<'_> { + fn drop(&mut self) { + if self.len & Self::MASK_OWNED != 0 { + // free allocation + unsafe { + drop(Box::from_raw(slice::from_raw_parts_mut( + self.ptr.as_ptr(), + self.compute_len(), + ))); + } + } + } +} + +impl<'a> From<&'a str> for GraphemeStr<'a> { + fn from(g: &'a str) -> Self { + GraphemeStr { + ptr: unsafe { NonNull::new_unchecked(g.as_bytes().as_ptr() as *mut u8) }, + len: i32::try_from(g.len()).unwrap() as u32, + phantom: PhantomData, + } + } +} + +impl<'a> From for GraphemeStr<'a> { + fn from(g: String) -> Self { + let len = g.len(); + let ptr = Box::into_raw(g.into_bytes().into_boxed_slice()) as *mut u8; + GraphemeStr { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + len: i32::try_from(len).unwrap() as u32, + phantom: PhantomData, + } + } +} + +impl<'a> From> for GraphemeStr<'a> { + fn from(g: Cow<'a, str>) -> Self { + match g { + Cow::Borrowed(g) => g.into(), + Cow::Owned(g) => g.into(), + } + } +} + +impl> PartialEq for GraphemeStr<'_> { + fn eq(&self, other: &T) -> bool { + self.deref() == other.deref() + } +} +impl PartialEq for GraphemeStr<'_> { + fn eq(&self, other: &str) -> bool { + self.deref() == other + } +} +impl Eq for GraphemeStr<'_> {} +impl Debug for GraphemeStr<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Debug::fmt(self.deref(), f) + } +} +impl Display for GraphemeStr<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Display::fmt(self.deref(), f) + } +} +impl Clone for GraphemeStr<'_> { + fn clone(&self) -> Self { + self.deref().to_owned().into() + } +} diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index ee174e69..e3f862a6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -6,6 +6,7 @@ pub mod comment; pub mod config; pub mod diagnostic; pub mod diff; +pub mod doc_formatter; pub mod graphemes; pub mod history; pub mod increment; @@ -24,6 +25,7 @@ pub mod shellwords; pub mod surround; pub mod syntax; pub mod test; +pub mod text_annotations; pub mod textobject; mod transaction; pub mod wrap; @@ -95,8 +97,12 @@ pub use {regex, tree_sitter}; pub use graphemes::RopeGraphemes; pub use position::{ - coords_at_pos, pos_at_coords, pos_at_visual_coords, visual_coords_at_pos, Position, + char_idx_at_visual_offset, coords_at_pos, pos_at_coords, visual_offset_from_anchor, + visual_offset_from_block, Position, }; +#[allow(deprecated)] +pub use position::{pos_at_visual_coords, visual_coords_at_pos}; + pub use selection::{Range, Selection}; pub use smallvec::{smallvec, SmallVec}; pub use syntax::Syntax; diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index 278375e8..11c12a6f 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -4,16 +4,19 @@ use ropey::iter::Chars; use tree_sitter::{Node, QueryCursor}; use crate::{ + char_idx_at_visual_offset, chars::{categorize_char, char_is_line_ending, CharCategory}, + doc_formatter::TextFormat, graphemes::{ next_grapheme_boundary, nth_next_grapheme_boundary, nth_prev_grapheme_boundary, prev_grapheme_boundary, }, line_ending::rope_is_line_ending, - pos_at_visual_coords, + position::char_idx_at_visual_block_offset, syntax::LanguageConfiguration, + text_annotations::TextAnnotations, textobject::TextObject, - visual_coords_at_pos, Position, Range, RopeSlice, + visual_offset_from_block, Range, RopeSlice, }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -34,7 +37,8 @@ pub fn move_horizontally( dir: Direction, count: usize, behaviour: Movement, - _: usize, + _: &TextFormat, + _: &mut TextAnnotations, ) -> Range { let pos = range.cursor(slice); @@ -48,35 +52,116 @@ pub fn move_horizontally( range.put_cursor(slice, new_pos, behaviour == Movement::Extend) } +pub fn move_vertically_visual( + slice: RopeSlice, + range: Range, + dir: Direction, + count: usize, + behaviour: Movement, + text_fmt: &TextFormat, + annotations: &mut TextAnnotations, +) -> Range { + if !text_fmt.soft_wrap { + move_vertically(slice, range, dir, count, behaviour, text_fmt, annotations); + } + annotations.clear_line_annotations(); + let pos = range.cursor(slice); + + // Compute the current position's 2d coordinates. + let (visual_pos, block_off) = visual_offset_from_block(slice, pos, pos, text_fmt, annotations); + let new_col = range + .old_visual_position + .map_or(visual_pos.col as u32, |(_, col)| col); + + // Compute the new position. + let mut row_off = match dir { + Direction::Forward => count as isize, + Direction::Backward => -(count as isize), + }; + + // TODO how to handle inline annotations that span an entire visual line (very unlikely). + + // Compute visual offset relative to block start to avoid trasversing the block twice + row_off += visual_pos.row as isize; + let new_pos = char_idx_at_visual_offset( + slice, + block_off, + row_off, + new_col as usize, + text_fmt, + annotations, + ) + .0; + + // Special-case to avoid moving to the end of the last non-empty line. + if behaviour == Movement::Extend && slice.line(slice.char_to_line(new_pos)).len_chars() == 0 { + return range; + } + + let mut new_range = range.put_cursor(slice, new_pos, behaviour == Movement::Extend); + new_range.old_visual_position = Some((0, new_col)); + new_range +} + pub fn move_vertically( slice: RopeSlice, range: Range, dir: Direction, count: usize, behaviour: Movement, - tab_width: usize, + text_fmt: &TextFormat, + annotations: &mut TextAnnotations, ) -> Range { + annotations.clear_line_annotations(); let pos = range.cursor(slice); + let line_idx = slice.char_to_line(pos); + let line_start = slice.line_to_char(line_idx); // Compute the current position's 2d coordinates. - let Position { row, col } = visual_coords_at_pos(slice, pos, tab_width); - let horiz = range.horiz.unwrap_or(col as u32); + let visual_pos = visual_offset_from_block(slice, line_start, pos, text_fmt, annotations).0; + let (mut new_row, new_col) = range + .old_visual_position + .map_or((visual_pos.row as u32, visual_pos.col as u32), |pos| pos); + new_row = new_row.max(visual_pos.row as u32); + let line_idx = slice.char_to_line(pos); // Compute the new position. - let new_row = match dir { - Direction::Forward => (row + count).min(slice.len_lines().saturating_sub(1)), - Direction::Backward => row.saturating_sub(count), + let mut new_line_idx = match dir { + Direction::Forward => line_idx.saturating_add(count), + Direction::Backward => line_idx.saturating_sub(count), }; - let new_col = col.max(horiz as usize); - let new_pos = pos_at_visual_coords(slice, Position::new(new_row, new_col), tab_width); + + let line = if new_line_idx >= slice.len_lines() - 1 { + // there is no line terminator for the last line + // so the logic below is not necessary here + new_line_idx = slice.len_lines() - 1; + slice + } else { + // char_idx_at_visual_block_offset returns a one-past-the-end index + // in case it reaches the end of the slice + // to avoid moving to the nextline in that case the line terminator is removed from the line + let new_line_end = prev_grapheme_boundary(slice, slice.line_to_char(new_line_idx + 1)); + slice.slice(..new_line_end) + }; + + let new_line_start = line.line_to_char(new_line_idx); + + let (new_pos, _) = char_idx_at_visual_block_offset( + line, + new_line_start, + new_row as usize, + new_col as usize, + text_fmt, + annotations, + ); // Special-case to avoid moving to the end of the last non-empty line. - if behaviour == Movement::Extend && slice.line(new_row).len_chars() == 0 { + if behaviour == Movement::Extend && slice.line(new_line_idx).len_chars() == 0 { return range; } let mut new_range = range.put_cursor(slice, new_pos, behaviour == Movement::Extend); - new_range.horiz = Some(horiz); + new_range.old_visual_position = Some((new_row, new_col)); new_range } @@ -473,7 +558,16 @@ mod test { assert_eq!( coords_at_pos( slice, - move_vertically(slice, range, Direction::Forward, 1, Movement::Move, 4).head + move_vertically_visual( + slice, + range, + Direction::Forward, + 1, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ) + .head ), (1, 3).into() ); @@ -497,7 +591,15 @@ mod test { ]; for ((direction, amount), coordinates) in moves_and_expected_coordinates { - range = move_horizontally(slice, range, direction, amount, Movement::Move, 0); + range = move_horizontally( + slice, + range, + direction, + amount, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ); assert_eq!(coords_at_pos(slice, range.head), coordinates.into()) } } @@ -523,7 +625,15 @@ mod test { ]; for ((direction, amount), coordinates) in moves_and_expected_coordinates { - range = move_horizontally(slice, range, direction, amount, Movement::Move, 0); + range = move_horizontally( + slice, + range, + direction, + amount, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ); assert_eq!(coords_at_pos(slice, range.head), coordinates.into()); assert_eq!(range.head, range.anchor); } @@ -545,7 +655,15 @@ mod test { ]; for (direction, amount) in moves { - range = move_horizontally(slice, range, direction, amount, Movement::Extend, 0); + range = move_horizontally( + slice, + range, + direction, + amount, + Movement::Extend, + &TextFormat::default(), + &mut TextAnnotations::default(), + ); assert_eq!(range.anchor, original_anchor); } } @@ -569,7 +687,15 @@ mod test { ]; for ((direction, amount), coordinates) in moves_and_expected_coordinates { - range = move_vertically(slice, range, direction, amount, Movement::Move, 4); + range = move_vertically_visual( + slice, + range, + direction, + amount, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ); assert_eq!(coords_at_pos(slice, range.head), coordinates.into()); assert_eq!(range.head, range.anchor); } @@ -603,8 +729,24 @@ mod test { for ((axis, direction, amount), coordinates) in moves_and_expected_coordinates { range = match axis { - Axis::H => move_horizontally(slice, range, direction, amount, Movement::Move, 0), - Axis::V => move_vertically(slice, range, direction, amount, Movement::Move, 4), + Axis::H => move_horizontally( + slice, + range, + direction, + amount, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ), + Axis::V => move_vertically_visual( + slice, + range, + direction, + amount, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ), }; assert_eq!(coords_at_pos(slice, range.head), coordinates.into()); assert_eq!(range.head, range.anchor); @@ -638,8 +780,24 @@ mod test { for ((axis, direction, amount), coordinates) in moves_and_expected_coordinates { range = match axis { - Axis::H => move_horizontally(slice, range, direction, amount, Movement::Move, 0), - Axis::V => move_vertically(slice, range, direction, amount, Movement::Move, 4), + Axis::H => move_horizontally( + slice, + range, + direction, + amount, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ), + Axis::V => move_vertically_visual( + slice, + range, + direction, + amount, + Movement::Move, + &TextFormat::default(), + &mut TextAnnotations::default(), + ), }; assert_eq!(coords_at_pos(slice, range.head), coordinates.into()); assert_eq!(range.head, range.anchor); diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs index f456eb98..7b8dc326 100644 --- a/helix-core/src/position.rs +++ b/helix-core/src/position.rs @@ -1,9 +1,11 @@ -use std::borrow::Cow; +use std::{borrow::Cow, cmp::Ordering}; use crate::{ chars::char_is_line_ending, + doc_formatter::{DocumentFormatter, TextFormat}, graphemes::{ensure_grapheme_boundary_prev, grapheme_width, RopeGraphemes}, line_ending::line_end_char_index, + text_annotations::TextAnnotations, RopeSlice, }; @@ -73,6 +75,13 @@ pub fn coords_at_pos(text: RopeSlice, pos: usize) -> Position { /// Takes \t, double-width characters (CJK) into account as well as text /// not in the document in the future. /// See [`coords_at_pos`] for an "objective" one. +/// +/// This function should be used very rarely. Usually `visual_offset_from_anchor` +/// or `visual_offset_from_block` is preferable. However when you want to compute the +/// actual visual row/column in the text (not what is actually shown on screen) +/// then you should use this function. For example aligning text should ignore virtual +/// text and softwrap. +#[deprecated = "Doesn't account for softwrap or decorations, use visual_offset_from_anchor instead"] pub fn visual_coords_at_pos(text: RopeSlice, pos: usize, tab_width: usize) -> Position { let line = text.char_to_line(pos); @@ -93,6 +102,82 @@ pub fn visual_coords_at_pos(text: RopeSlice, pos: usize, tab_width: usize) -> Po Position::new(line, col) } +/// Returns the visual offset from the start of the first visual line +/// in the block that contains anchor. +/// Text is always wrapped at blocks, they usually correspond to +/// actual line breaks but for very long lines +/// softwrapping positions are estimated with an O(1) algorithm +/// to ensure consistent performance for large lines (currently unimplemented) +/// +/// Usualy you want to use `visual_offset_from_anchor` instead but this function +/// can be useful (and faster) if +/// * You already know the visual position of the block +/// * You only care about the horizontal offset (column) and not the vertical offset (row) +pub fn visual_offset_from_block( + text: RopeSlice, + anchor: usize, + pos: usize, + text_fmt: &TextFormat, + annotations: &TextAnnotations, +) -> (Position, usize) { + let mut last_pos = Position::default(); + let (formatter, block_start) = + DocumentFormatter::new_at_prev_checkpoint(text, text_fmt, annotations, anchor); + let mut char_pos = block_start; + + for (grapheme, vpos) in formatter { + last_pos = vpos; + char_pos += grapheme.doc_chars(); + + if char_pos > pos { + return (last_pos, block_start); + } + } + + (last_pos, block_start) +} + +/// Returns the visual offset from the start of the visual line +/// that contains anchor. +pub fn visual_offset_from_anchor( + text: RopeSlice, + anchor: usize, + pos: usize, + text_fmt: &TextFormat, + annotations: &TextAnnotations, + max_rows: usize, +) -> Option<(Position, usize)> { + let (formatter, block_start) = + DocumentFormatter::new_at_prev_checkpoint(text, text_fmt, annotations, anchor); + let mut char_pos = block_start; + let mut anchor_line = None; + let mut last_pos = Position::default(); + + for (grapheme, vpos) in formatter { + last_pos = vpos; + char_pos += grapheme.doc_chars(); + + if char_pos > anchor && anchor_line.is_none() { + anchor_line = Some(last_pos.row); + } + if char_pos > pos { + last_pos.row -= anchor_line.unwrap(); + return Some((last_pos, block_start)); + } + + if let Some(anchor_line) = anchor_line { + if vpos.row >= anchor_line + max_rows { + return None; + } + } + } + + let anchor_line = anchor_line.unwrap_or(last_pos.row); + last_pos.row -= anchor_line; + + Some((last_pos, block_start)) +} + /// Convert (line, column) coordinates to a character index. /// /// If the `line` coordinate is beyond the end of the file, the EOF @@ -140,6 +225,11 @@ pub fn pos_at_coords(text: RopeSlice, coords: Position, limit_before_line_ending /// If the `column` coordinate is past the end of the given line, the /// line-end position (in this case, just before the line ending /// character) will be returned. +/// This function should be used very rarely. Usually `char_idx_at_visual_offset` is preferable. +/// However when you want to compute a char position from the visual row/column in the text +/// (not what is actually shown on screen) then you should use this function. +/// For example aligning text should ignore virtual text and softwrap. +#[deprecated = "Doesn't account for softwrap or decorations, use char_idx_at_visual_offset instead"] pub fn pos_at_visual_coords(text: RopeSlice, coords: Position, tab_width: usize) -> usize { let Position { mut row, col } = coords; row = row.min(text.len_lines() - 1); @@ -169,6 +259,120 @@ pub fn pos_at_visual_coords(text: RopeSlice, coords: Position, tab_width: usize) line_start + col_char_offset } +/// Returns the char index on the visual line `row_offset` below the visual line of +/// the provided char index `anchor` that is closest to the supplied visual `column`. +/// +/// If the targeted visual line is entirely covered by virtual text the last +/// char position before the virtual text and a virtual offset is returned instead. +/// +/// If no (text) grapheme starts at exactly at the specified column the +/// start of the grapheme to the left is returned. If there is no grapheme +/// to the left (for example if the line starts with virtual text) then the positiong +/// of the next grapheme to the right is returned. +/// +/// If the `line` coordinate is beyond the end of the file, the EOF +/// position will be returned. +/// +/// If the `column` coordinate is past the end of the given line, the +/// line-end position (in this case, just before the line ending +/// character) will be returned. +/// +/// # Returns +/// +/// `(real_char_idx, virtual_lines)` +/// +/// The nearest character idx "closest" (see above) to the specified visual offset +/// on the visual line is returned if the visual line contains any text: +/// If the visual line at the specified offset is a virtual line generated by a `LineAnnotation` +/// the previous char_index is returned, together with the remaining vertical offset (`virtual_lines`) +pub fn char_idx_at_visual_offset<'a>( + text: RopeSlice<'a>, + mut anchor: usize, + mut row_offset: isize, + column: usize, + text_fmt: &TextFormat, + annotations: &TextAnnotations, +) -> (usize, usize) { + // convert row relative to visual line containing anchor to row relative to a block containing anchor (anchor may change) + loop { + let (visual_pos_in_block, block_char_offset) = + visual_offset_from_block(text, anchor, anchor, text_fmt, annotations); + row_offset += visual_pos_in_block.row as isize; + anchor = block_char_offset; + if row_offset >= 0 { + break; + } + + if block_char_offset == 0 { + row_offset = 0; + break; + } + // the row_offset is negative so we need to look at the previous block + // set the anchor to the last char before the current block + // this char index is also always a line earlier so increase the row_offset by 1 + anchor -= 1; + row_offset += 1; + } + + char_idx_at_visual_block_offset( + text, + anchor, + row_offset as usize, + column, + text_fmt, + annotations, + ) +} + +/// This function behaves the same as `char_idx_at_visual_offset`, except that +/// the vertical offset `row` is always computed relative to the block that contains `anchor` +/// instead of the visual line that contains `anchor`. +/// Usually `char_idx_at_visual_offset` is more useful but this function can be +/// used in some situations as an optimization when `visual_offset_from_block` was used +/// +/// # Returns +/// +/// `(real_char_idx, virtual_lines)` +/// +/// See `char_idx_at_visual_offset` for details +pub fn char_idx_at_visual_block_offset( + text: RopeSlice, + anchor: usize, + row: usize, + column: usize, + text_fmt: &TextFormat, + annotations: &TextAnnotations, +) -> (usize, usize) { + let (formatter, mut char_idx) = + DocumentFormatter::new_at_prev_checkpoint(text, text_fmt, annotations, anchor); + let mut last_char_idx = char_idx; + let mut last_char_idx_on_line = None; + let mut last_row = 0; + for (grapheme, grapheme_pos) in formatter { + match grapheme_pos.row.cmp(&row) { + Ordering::Equal => { + if grapheme_pos.col + grapheme.width() > column { + if !grapheme.is_virtual() { + return (char_idx, 0); + } else if let Some(char_idx) = last_char_idx_on_line { + return (char_idx, 0); + } + } else if !grapheme.is_virtual() { + last_char_idx_on_line = Some(char_idx) + } + } + Ordering::Greater => return (last_char_idx, row - last_row), + _ => (), + } + + last_char_idx = char_idx; + last_row = grapheme_pos.row; + char_idx += grapheme.doc_chars(); + } + + (char_idx, 0) +} + #[cfg(test)] mod test { use super::*; @@ -228,6 +432,7 @@ mod test { } #[test] + #[allow(deprecated)] fn test_visual_coords_at_pos() { let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ"); let slice = text.slice(..); @@ -275,6 +480,130 @@ mod test { assert_eq!(visual_coords_at_pos(slice, 2, 8), (0, 9).into()); } + #[test] + fn test_visual_off_from_block() { + let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ"); + let slice = text.slice(..); + let annot = TextAnnotations::default(); + let text_fmt = TextFormat::default(); + assert_eq!( + visual_offset_from_block(slice, 0, 0, &text_fmt, &annot).0, + (0, 0).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 5, &text_fmt, &annot).0, + (0, 5).into() + ); // position on \n + assert_eq!( + visual_offset_from_block(slice, 0, 6, &text_fmt, &annot).0, + (1, 0).into() + ); // position on w + assert_eq!( + visual_offset_from_block(slice, 0, 7, &text_fmt, &annot).0, + (1, 1).into() + ); // position on o + assert_eq!( + visual_offset_from_block(slice, 0, 10, &text_fmt, &annot).0, + (1, 4).into() + ); // position on d + + // Test with wide characters. + let text = Rope::from("今日はいい\n"); + let slice = text.slice(..); + assert_eq!( + visual_offset_from_block(slice, 0, 0, &text_fmt, &annot).0, + (0, 0).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 1, &text_fmt, &annot).0, + (0, 2).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 2, &text_fmt, &annot).0, + (0, 4).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 3, &text_fmt, &annot).0, + (0, 6).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 4, &text_fmt, &annot).0, + (0, 8).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 5, &text_fmt, &annot).0, + (0, 10).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 6, &text_fmt, &annot).0, + (1, 0).into() + ); + + // Test with grapheme clusters. + let text = Rope::from("a̐éö̲\r\n"); + let slice = text.slice(..); + assert_eq!( + visual_offset_from_block(slice, 0, 0, &text_fmt, &annot).0, + (0, 0).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 2, &text_fmt, &annot).0, + (0, 1).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 4, &text_fmt, &annot).0, + (0, 2).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 7, &text_fmt, &annot).0, + (0, 3).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 9, &text_fmt, &annot).0, + (1, 0).into() + ); + + // Test with wide-character grapheme clusters. + // TODO: account for cluster. + let text = Rope::from("किमपि\n"); + let slice = text.slice(..); + assert_eq!( + visual_offset_from_block(slice, 0, 0, &text_fmt, &annot).0, + (0, 0).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 2, &text_fmt, &annot).0, + (0, 2).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 3, &text_fmt, &annot).0, + (0, 3).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 5, &text_fmt, &annot).0, + (0, 5).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 6, &text_fmt, &annot).0, + (1, 0).into() + ); + + // Test with tabs. + let text = Rope::from("\tHello\n"); + let slice = text.slice(..); + assert_eq!( + visual_offset_from_block(slice, 0, 0, &text_fmt, &annot).0, + (0, 0).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 1, &text_fmt, &annot).0, + (0, 4).into() + ); + assert_eq!( + visual_offset_from_block(slice, 0, 2, &text_fmt, &annot).0, + (0, 5).into() + ); + } #[test] fn test_pos_at_coords() { let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ"); @@ -341,6 +670,7 @@ mod test { } #[test] + #[allow(deprecated)] fn test_pos_at_visual_coords() { let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ"); let slice = text.slice(..); @@ -405,4 +735,100 @@ mod test { assert_eq!(pos_at_visual_coords(slice, (0, 10).into(), 4), 0); assert_eq!(pos_at_visual_coords(slice, (10, 10).into(), 4), 0); } + + #[test] + fn test_char_idx_at_visual_row_offset() { + let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ\nfoo"); + let slice = text.slice(..); + let mut text_fmt = TextFormat::default(); + for i in 0isize..3isize { + for j in -2isize..=2isize { + if !(0..3).contains(&(i + j)) { + continue; + } + println!("{i} {j}"); + assert_eq!( + char_idx_at_visual_offset( + slice, + slice.line_to_char(i as usize), + j, + 3, + &text_fmt, + &TextAnnotations::default(), + ) + .0, + slice.line_to_char((i + j) as usize) + 3 + ); + } + } + + text_fmt.soft_wrap = true; + let mut softwrapped_text = "foo ".repeat(10); + softwrapped_text.push('\n'); + let last_char = softwrapped_text.len() - 1; + + let text = Rope::from(softwrapped_text.repeat(3)); + let slice = text.slice(..); + assert_eq!( + char_idx_at_visual_offset( + slice, + last_char, + 0, + 0, + &text_fmt, + &TextAnnotations::default(), + ) + .0, + 32 + ); + assert_eq!( + char_idx_at_visual_offset( + slice, + last_char, + -1, + 0, + &text_fmt, + &TextAnnotations::default(), + ) + .0, + 16 + ); + assert_eq!( + char_idx_at_visual_offset( + slice, + last_char, + -2, + 0, + &text_fmt, + &TextAnnotations::default(), + ) + .0, + 0 + ); + assert_eq!( + char_idx_at_visual_offset( + slice, + softwrapped_text.len() + last_char, + -2, + 0, + &text_fmt, + &TextAnnotations::default(), + ) + .0, + softwrapped_text.len() + ); + + assert_eq!( + char_idx_at_visual_offset( + slice, + softwrapped_text.len() + last_char, + -5, + 0, + &text_fmt, + &TextAnnotations::default(), + ) + .0, + 0 + ); + } } diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index ffba46ab..7817618f 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -53,7 +53,9 @@ pub struct Range { pub anchor: usize, /// The head of the range, moved when extending. pub head: usize, - pub horiz: Option, + /// The previous visual offset (softwrapped lines and columns) from + /// the start of the line + pub old_visual_position: Option<(u32, u32)>, } impl Range { @@ -61,7 +63,7 @@ impl Range { Self { anchor, head, - horiz: None, + old_visual_position: None, } } @@ -127,7 +129,7 @@ impl Range { Self { anchor: self.head, head: self.anchor, - horiz: self.horiz, + old_visual_position: self.old_visual_position, } } @@ -185,7 +187,7 @@ impl Range { Self { anchor, head, - horiz: None, + old_visual_position: None, } } @@ -198,13 +200,13 @@ impl Range { Self { anchor: self.anchor.min(from), head: self.head.max(to), - horiz: None, + old_visual_position: None, } } else { Self { anchor: self.anchor.max(to), head: self.head.min(from), - horiz: None, + old_visual_position: None, } } } @@ -219,13 +221,13 @@ impl Range { Range { anchor: self.anchor.max(other.anchor), head: self.head.min(other.head), - horiz: None, + old_visual_position: None, } } else { Range { anchor: self.from().min(other.from()), head: self.to().max(other.to()), - horiz: None, + old_visual_position: None, } } } @@ -279,8 +281,8 @@ impl Range { Range { anchor: new_anchor, head: new_head, - horiz: if new_anchor == self.anchor { - self.horiz + old_visual_position: if new_anchor == self.anchor { + self.old_visual_position } else { None }, @@ -306,7 +308,7 @@ impl Range { Range { anchor: self.anchor, head: next_grapheme_boundary(slice, self.head), - horiz: self.horiz, + old_visual_position: self.old_visual_position, } } else { *self @@ -378,7 +380,7 @@ impl From<(usize, usize)> for Range { Self { anchor, head, - horiz: None, + old_visual_position: None, } } } @@ -482,7 +484,7 @@ impl Selection { ranges: smallvec![Range { anchor, head, - horiz: None + old_visual_position: None }], primary_index: 0, } @@ -566,9 +568,9 @@ impl Selection { } /// Takes a closure and maps each `Range` over the closure. - pub fn transform(mut self, f: F) -> Self + pub fn transform(mut self, mut f: F) -> Self where - F: Fn(Range) -> Range, + F: FnMut(Range) -> Range, { for range in self.ranges.iter_mut() { *range = f(*range) diff --git a/helix-core/src/text_annotations.rs b/helix-core/src/text_annotations.rs new file mode 100644 index 00000000..1956f6b5 --- /dev/null +++ b/helix-core/src/text_annotations.rs @@ -0,0 +1,271 @@ +use std::cell::Cell; +use std::convert::identity; +use std::ops::Range; +use std::rc::Rc; + +use crate::syntax::Highlight; +use crate::Tendril; + +/// An inline annotation is continuous text shown +/// on the screen before the grapheme that starts at +/// `char_idx` +#[derive(Debug, Clone)] +pub struct InlineAnnotation { + pub text: Tendril, + pub char_idx: usize, +} + +/// Represents a **single Grapheme** that is part of the document +/// that start at `char_idx` that will be replaced with +/// a different `grapheme`. +/// If `grapheme` contains multiple graphemes the text +/// will render incorrectly. +/// If you want to overlay multiple graphemes simply +/// use multiple `Overlays`. +/// +/// # Examples +/// +/// The following examples are valid overlays for the following text: +/// +/// `aX͎̊͢͜͝͡bc` +/// +/// ``` +/// use helix_core::text_annotations::Overlay; +/// +/// // replaces a +/// Overlay { +/// char_idx: 0, +/// grapheme: "X".into(), +/// }; +/// +/// // replaces X͎̊͢͜͝͡ +/// Overlay{ +/// char_idx: 1, +/// grapheme: "\t".into(), +/// }; +/// +/// // replaces b +/// Overlay{ +/// char_idx: 6, +/// grapheme: "X̢̢̟͖̲͌̋̇͑͝".into(), +/// }; +/// ``` +/// +/// The following examples are invalid uses +/// +/// ``` +/// use helix_core::text_annotations::Overlay; +/// +/// // overlay is not aligned at grapheme boundary +/// Overlay{ +/// char_idx: 3, +/// grapheme: "x".into(), +/// }; +/// +/// // overlay contains multiple graphemes +/// Overlay{ +/// char_idx: 0, +/// grapheme: "xy".into(), +/// }; +/// ``` +#[derive(Debug, Clone)] +pub struct Overlay { + pub char_idx: usize, + pub grapheme: Tendril, +} + +/// Line annotations allow for virtual text between normal +/// text lines. They cause `height` empty lines to be inserted +/// below the document line that contains `anchor_char_idx`. +/// +/// These lines can be filled with text in the rendering code +/// as their contents have no effect beyond visual appearance. +/// +/// To insert a line after a document line simply set +/// `anchor_char_idx` to `doc.line_to_char(line_idx)` +#[derive(Debug, Clone)] +pub struct LineAnnotation { + pub anchor_char_idx: usize, + pub height: usize, +} + +#[derive(Debug)] +struct Layer { + annotations: Rc<[A]>, + current_index: Cell, + metadata: M, +} + +impl Clone for Layer { + fn clone(&self) -> Self { + Layer { + annotations: self.annotations.clone(), + current_index: self.current_index.clone(), + metadata: self.metadata.clone(), + } + } +} + +impl Layer { + pub fn reset_pos(&self, char_idx: usize, get_char_idx: impl Fn(&A) -> usize) { + let new_index = self + .annotations + .binary_search_by_key(&char_idx, get_char_idx) + .unwrap_or_else(identity); + + self.current_index.set(new_index); + } + + pub fn consume(&self, char_idx: usize, get_char_idx: impl Fn(&A) -> usize) -> Option<&A> { + let annot = self.annotations.get(self.current_index.get())?; + debug_assert!(get_char_idx(annot) >= char_idx); + if get_char_idx(annot) == char_idx { + self.current_index.set(self.current_index.get() + 1); + Some(annot) + } else { + None + } + } +} + +impl From<(Rc<[A]>, M)> for Layer { + fn from((annotations, metadata): (Rc<[A]>, M)) -> Layer { + Layer { + annotations, + current_index: Cell::new(0), + metadata, + } + } +} + +fn reset_pos(layers: &[Layer], pos: usize, get_pos: impl Fn(&A) -> usize) { + for layer in layers { + layer.reset_pos(pos, &get_pos) + } +} + +/// Annotations that change that is displayed when the document is render. +/// Also commonly called virtual text. +#[derive(Default, Debug, Clone)] +pub struct TextAnnotations { + inline_annotations: Vec>>, + overlays: Vec>>, + line_annotations: Vec>, +} + +impl TextAnnotations { + /// Prepare the TextAnnotations for iteration starting at char_idx + pub fn reset_pos(&self, char_idx: usize) { + reset_pos(&self.inline_annotations, char_idx, |annot| annot.char_idx); + reset_pos(&self.overlays, char_idx, |annot| annot.char_idx); + reset_pos(&self.line_annotations, char_idx, |annot| { + annot.anchor_char_idx + }); + } + + pub fn collect_overlay_highlights( + &self, + char_range: Range, + ) -> Vec<(usize, Range)> { + let mut highlights = Vec::new(); + self.reset_pos(char_range.start); + for char_idx in char_range { + if let Some((_, Some(highlight))) = self.overlay_at(char_idx) { + // we don't know the number of chars the original grapheme takes + // however it doesn't matter as highlight bounderies are automatically + // aligned to grapheme boundaries in the rendering code + highlights.push((highlight.0, char_idx..char_idx + 1)) + } + } + + highlights + } + + /// Add new inline annotations. + /// + /// The annotations grapheme will be rendered with `highlight` + /// patched on top of `ui.text`. + /// + /// The annotations **must be sorted** by their `char_idx`. + /// Multiple annotations with the same `char_idx` are allowed, + /// they will be display in the order that they are present in the layer. + /// + /// If multiple layers contain annotations at the same position + /// the annotations that belong to the layers added first will be shown first. + pub fn add_inline_annotations( + &mut self, + layer: Rc<[InlineAnnotation]>, + highlight: Option, + ) -> &mut Self { + self.inline_annotations.push((layer, highlight).into()); + self + } + + /// Add new grapheme overlays. + /// + /// The overlayed grapheme will be rendered with `highlight` + /// patched on top of `ui.text`. + /// + /// The overlays **must be sorted** by their `char_idx`. + /// Multiple overlays with the same `char_idx` **are allowed**. + /// + /// If multiple layers contain overlay at the same position + /// the overlay from the layer added last will be show. + pub fn add_overlay(&mut self, layer: Rc<[Overlay]>, highlight: Option) -> &mut Self { + self.overlays.push((layer, highlight).into()); + self + } + + /// Add new annotation lines. + /// + /// The line annotations **must be sorted** by their `char_idx`. + /// Multiple line annotations with the same `char_idx` **are not allowed**. + pub fn add_line_annotation(&mut self, layer: Rc<[LineAnnotation]>) -> &mut Self { + self.line_annotations.push((layer, ()).into()); + self + } + + /// Removes all line annotations, useful for vertical motions + /// so that virtual text lines are automatically skipped. + pub fn clear_line_annotations(&mut self) { + self.line_annotations.clear(); + } + + pub(crate) fn next_inline_annotation_at( + &self, + char_idx: usize, + ) -> Option<(&InlineAnnotation, Option)> { + self.inline_annotations.iter().find_map(|layer| { + let annotation = layer.consume(char_idx, |annot| annot.char_idx)?; + Some((annotation, layer.metadata)) + }) + } + + pub(crate) fn overlay_at(&self, char_idx: usize) -> Option<(&Overlay, Option)> { + let mut overlay = None; + for layer in &self.overlays { + while let Some(new_overlay) = layer.consume(char_idx, |annot| annot.char_idx) { + overlay = Some((new_overlay, layer.metadata)); + } + } + overlay + } + + pub(crate) fn annotation_lines_at(&self, char_idx: usize) -> usize { + self.line_annotations + .iter() + .map(|layer| { + let mut lines = 0; + while let Some(annot) = layer.annotations.get(layer.current_index.get()) { + if annot.anchor_char_idx == char_idx { + layer.current_index.set(layer.current_index.get() + 1); + lines += annot.height + } else { + break; + } + } + lines + }) + .sum() + } +} -- cgit v1.2.3-70-g09d2