diff options
author | Pascal Kuthe | 2023-01-31 17:03:19 +0000 |
---|---|---|
committer | GitHub | 2023-01-31 17:03:19 +0000 |
commit | 4dcf1fe66ba30a78edc054780d9b65c2f826530f (patch) | |
tree | ffb84ea94f07ceb52494a955b1bd78f115395dc0 /helix-core/src/graphemes.rs | |
parent | 4eca4b3079bf53de874959270d0b3471d320debc (diff) |
rework positioning/rendering and enable softwrap/virtual text (#5420)
* rework positioning/rendering, enables softwrap/virtual text
This commit is a large rework of the core text positioning and
rendering code in helix to remove the assumption that on-screen
columns/lines correspond to text columns/lines.
A generic `DocFormatter` is introduced that positions graphemes on
and is used both for rendering and for movements/scrolling.
Both virtual text support (inline, grapheme overlay and multi-line)
and a capable softwrap implementation is included.
fix picker highlight
cleanup doc formatter, use word bondaries for wrapping
make visual vertical movement a seperate commnad
estimate line gutter width to improve performance
cache cursor position
cleanup and optimize doc formatter
cleanup documentation
fix typos
Co-authored-by: Daniel Hines <d4hines@gmail.com>
update documentation
fix panic in last_visual_line funciton
improve soft-wrap documentation
add extend_visual_line_up/down commands
fix non-visual vertical movement
streamline virtual text highlighting, add softwrap indicator
fix cursor position if softwrap is disabled
improve documentation of text_annotations module
avoid crashes if view anchor is out of bounds
fix: consider horizontal offset when traslation char_idx -> vpos
improve default configuration
fix: mixed up horizontal and vertical offset
reset view position after config reload
apply suggestions from review
disabled softwrap for very small screens to avoid endless spin
fix wrap_indicator setting
fix bar cursor disappearring on the EOF character
add keybinding for linewise vertical movement
fix: inconsistent gutter highlights
improve virtual text API
make scope idx lookup more ergonomic
allow overlapping overlays
correctly track char_pos for virtual text
adjust configuration
deprecate old position fucntions
fix infinite loop in highlight lookup
fix gutter style
fix formatting
document max-line-width interaction with softwrap
change wrap-indicator example to use empty string
fix: rare panic when view is in invalid state (bis)
* Apply suggestions from code review
Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
* improve documentation for positoning functions
* simplify tests
* fix documentation of Grapheme::width
* Apply suggestions from code review
Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
* add explicit drop invocation
* Add explicit MoveFn type alias
* add docuntation to Editor::cursor_cache
* fix a few typos
* explain use of allow(deprecated)
* make gj and gk extend in select mode
* remove unneded debug and TODO
* mark tab_width_at #[inline]
* add fast-path to move_vertically_visual in case softwrap is disabled
* rename first_line to first_visual_line
* simplify duplicate if/else
---------
Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
Diffstat (limited to 'helix-core/src/graphemes.rs')
-rw-r--r-- | helix-core/src/graphemes.rs | 183 |
1 files changed, 182 insertions, 1 deletions
diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index 675f5750..15ef3eb0 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -5,7 +5,88 @@ use ropey::{iter::Chunks, str_utils::byte_to_char_idx, RopeSlice}; use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete}; use unicode_width::UnicodeWidthStr; -use std::fmt; +use std::borrow::Cow; +use std::fmt::{self, Debug, Display}; +use std::marker::PhantomData; +use std::ops::Deref; +use std::ptr::NonNull; +use std::{slice, str}; + +use crate::chars::{char_is_whitespace, char_is_word}; +use crate::LineEnding; + +#[inline] +pub fn tab_width_at(visual_x: usize, tab_width: u16) -> usize { + tab_width as usize - (visual_x % tab_width as usize) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Grapheme<'a> { + Newline, + Tab { width: usize }, + Other { g: GraphemeStr<'a> }, +} + +impl<'a> Grapheme<'a> { + pub fn new(g: GraphemeStr<'a>, visual_x: usize, tab_width: u16) -> Grapheme<'a> { + match g { + g if g == "\t" => Grapheme::Tab { + width: tab_width_at(visual_x, tab_width), + }, + _ if LineEnding::from_str(&g).is_some() => Grapheme::Newline, + _ => Grapheme::Other { g }, + } + } + + pub fn change_position(&mut self, visual_x: usize, tab_width: u16) { + if let Grapheme::Tab { width } = self { + *width = tab_width_at(visual_x, tab_width) + } + } + + /// Returns the a visual width of this grapheme, + #[inline] + pub fn width(&self) -> usize { + match *self { + // width is not cached because we are dealing with + // ASCII almost all the time which already has a fastpath + // it's okay to convert to u16 here because no codepoint has a width larger + // than 2 and graphemes are usually atmost two visible codepoints wide + Grapheme::Other { ref g } => grapheme_width(g), + Grapheme::Tab { width } => width, + Grapheme::Newline => 1, + } + } + + pub fn is_whitespace(&self) -> bool { + !matches!(&self, Grapheme::Other { g } if !g.chars().all(char_is_whitespace)) + } + + // TODO currently word boundaries are used for softwrapping. + // This works best for programming languages and well for prose. + // This could however be improved in the future by considering unicode + // character classes but + pub fn is_word_boundary(&self) -> bool { + !matches!(&self, Grapheme::Other { g,.. } if g.chars().all(char_is_word)) + } +} + +impl Display for Grapheme<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Grapheme::Newline => write!(f, " "), + Grapheme::Tab { width } => { + for _ in 0..width { + write!(f, " ")?; + } + Ok(()) + } + Grapheme::Other { ref g } => { + write!(f, "{g}") + } + } + } +} #[must_use] pub fn grapheme_width(g: &str) -> usize { @@ -27,6 +108,8 @@ pub fn grapheme_width(g: &str) -> usize { // We use max(1) here because all grapeheme clusters--even illformed // ones--should have at least some width so they can be edited // properly. + // TODO properly handle unicode width for all codepoints + // example of where unicode width is currently wrong: 🤦🏼♂️ (taken from https://hsivonen.fi/string-length/) UnicodeWidthStr::width(g).max(1) } } @@ -341,3 +424,101 @@ impl<'a> Iterator for RopeGraphemes<'a> { } } } + +/// A highly compressed Cow<'a, str> that holds +/// atmost u31::MAX bytes and is readonly +pub struct GraphemeStr<'a> { + ptr: NonNull<u8>, + len: u32, + phantom: PhantomData<&'a str>, +} + +impl GraphemeStr<'_> { + const MASK_OWNED: u32 = 1 << 31; + + fn compute_len(&self) -> usize { + (self.len & !Self::MASK_OWNED) as usize + } +} + +impl Deref for GraphemeStr<'_> { + type Target = str; + fn deref(&self) -> &Self::Target { + unsafe { + let bytes = slice::from_raw_parts(self.ptr.as_ptr(), self.compute_len()); + str::from_utf8_unchecked(bytes) + } + } +} + +impl Drop for GraphemeStr<'_> { + fn drop(&mut self) { + if self.len & Self::MASK_OWNED != 0 { + // free allocation + unsafe { + drop(Box::from_raw(slice::from_raw_parts_mut( + self.ptr.as_ptr(), + self.compute_len(), + ))); + } + } + } +} + +impl<'a> From<&'a str> for GraphemeStr<'a> { + fn from(g: &'a str) -> Self { + GraphemeStr { + ptr: unsafe { NonNull::new_unchecked(g.as_bytes().as_ptr() as *mut u8) }, + len: i32::try_from(g.len()).unwrap() as u32, + phantom: PhantomData, + } + } +} + +impl<'a> From<String> for GraphemeStr<'a> { + fn from(g: String) -> Self { + let len = g.len(); + let ptr = Box::into_raw(g.into_bytes().into_boxed_slice()) as *mut u8; + GraphemeStr { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + len: i32::try_from(len).unwrap() as u32, + phantom: PhantomData, + } + } +} + +impl<'a> From<Cow<'a, str>> for GraphemeStr<'a> { + fn from(g: Cow<'a, str>) -> Self { + match g { + Cow::Borrowed(g) => g.into(), + Cow::Owned(g) => g.into(), + } + } +} + +impl<T: Deref<Target = str>> PartialEq<T> for GraphemeStr<'_> { + fn eq(&self, other: &T) -> bool { + self.deref() == other.deref() + } +} +impl PartialEq<str> for GraphemeStr<'_> { + fn eq(&self, other: &str) -> bool { + self.deref() == other + } +} +impl Eq for GraphemeStr<'_> {} +impl Debug for GraphemeStr<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Debug::fmt(self.deref(), f) + } +} +impl Display for GraphemeStr<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Display::fmt(self.deref(), f) + } +} +impl Clone for GraphemeStr<'_> { + fn clone(&self) -> Self { + self.deref().to_owned().into() + } +} |