From f7bd7b5eafac9f016a470cc77c780922f83e690b Mon Sep 17 00:00:00 2001 From: William Etheredge Date: Fri, 3 Feb 2023 08:24:46 -0600 Subject: Add :character-info command (#4000) --- helix-term/src/commands/typed.rs | 133 ++++++++++++++++++++++++++++++++++++++ helix-term/tests/test/commands.rs | 58 +++++++++++++++++ 2 files changed, 191 insertions(+) (limited to 'helix-term') diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index f2495d8c..1fd11b65 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -4,6 +4,7 @@ use crate::job::Job; use super::*; +use helix_core::encoding; use helix_view::editor::{Action, CloseError, ConfigEvent}; use ui::completers::{self, Completer}; @@ -1033,6 +1034,131 @@ fn set_encoding( } } +/// Shows info about the character under the primary cursor. +fn get_character_info( + cx: &mut compositor::Context, + _args: &[Cow], + event: PromptEvent, +) -> anyhow::Result<()> { + if event != PromptEvent::Validate { + return Ok(()); + } + + let (view, doc) = current_ref!(cx.editor); + let text = doc.text().slice(..); + + let grapheme_start = doc.selection(view.id).primary().cursor(text); + let grapheme_end = graphemes::next_grapheme_boundary(text, grapheme_start); + + if grapheme_start == grapheme_end { + return Ok(()); + } + + let grapheme = text.slice(grapheme_start..grapheme_end).to_string(); + let encoding = doc.encoding(); + + let printable = grapheme.chars().fold(String::new(), |mut s, c| { + match c { + '\0' => s.push_str("\\0"), + '\t' => s.push_str("\\t"), + '\n' => s.push_str("\\n"), + '\r' => s.push_str("\\r"), + _ => s.push(c), + } + + s + }); + + // Convert to Unicode codepoints if in UTF-8 + let unicode = if encoding == encoding::UTF_8 { + let mut unicode = " (".to_owned(); + + for (i, char) in grapheme.chars().enumerate() { + if i != 0 { + unicode.push(' '); + } + + unicode.push_str("U+"); + + let codepoint: u32 = if char.is_ascii() { + char.into() + } else { + // Not ascii means it will be multi-byte, so strip out the extra + // bits that encode the length & mark continuation bytes + + let s = String::from(char); + let bytes = s.as_bytes(); + + // First byte starts with 2-4 ones then a zero, so strip those off + let first = bytes[0]; + let codepoint = first & (0xFF >> (first.leading_ones() + 1)); + let mut codepoint = u32::from(codepoint); + + // Following bytes start with 10 + for byte in bytes.iter().skip(1) { + codepoint <<= 6; + codepoint += u32::from(*byte) & 0x3F; + } + + codepoint + }; + + unicode.push_str(&format!("{codepoint:0>4x}")); + } + + unicode.push(')'); + unicode + } else { + String::new() + }; + + // Give the decimal value for ascii characters + let dec = if encoding.is_ascii_compatible() && grapheme.len() == 1 { + format!(" Dec {}", grapheme.as_bytes()[0]) + } else { + String::new() + }; + + let hex = { + let mut encoder = encoding.new_encoder(); + let max_encoded_len = encoder + .max_buffer_length_from_utf8_without_replacement(grapheme.len()) + .unwrap(); + let mut bytes = Vec::with_capacity(max_encoded_len); + let mut current_byte = 0; + let mut hex = String::new(); + + for (i, char) in grapheme.chars().enumerate() { + if i != 0 { + hex.push_str(" +"); + } + + let (result, _input_bytes_read) = encoder.encode_from_utf8_to_vec_without_replacement( + &char.to_string(), + &mut bytes, + true, + ); + + if let encoding::EncoderResult::Unmappable(char) = result { + bail!("{char:?} cannot be mapped to {}", encoding.name()); + } + + for byte in &bytes[current_byte..] { + hex.push_str(&format!(" {byte:0>2x}")); + } + + current_byte = bytes.len(); + } + + hex + }; + + cx.editor + .set_status(format!("\"{printable}\"{unicode}{dec} Hex{hex}")); + + Ok(()) +} + /// Reload the [`Document`] from its source file. fn reload( cx: &mut compositor::Context, @@ -2131,6 +2257,13 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[ fun: set_encoding, completer: None, }, + TypableCommand { + name: "character-info", + aliases: &["char"], + doc: "Get info about the character under the primary cursor.", + fun: get_character_info, + completer: None, + }, TypableCommand { name: "reload", aliases: &[], diff --git a/helix-term/tests/test/commands.rs b/helix-term/tests/test/commands.rs index 6e7275f5..da2e020e 100644 --- a/helix-term/tests/test/commands.rs +++ b/helix-term/tests/test/commands.rs @@ -354,3 +354,61 @@ async fn test_extend_line() -> anyhow::Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread")] +async fn test_character_info() -> anyhow::Result<()> { + // UTF-8, single byte + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some("ihh:char"), + Some(&|app| { + assert_eq!( + r#""h" (U+0068) Dec 104 Hex 68"#, + app.editor.get_status().unwrap().0 + ); + }), + false, + ) + .await?; + + // UTF-8, multi-byte + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some("iëh:char"), + Some(&|app| { + assert_eq!( + r#""ë" (U+0065 U+0308) Hex 65 + cc 88"#, + app.editor.get_status().unwrap().0 + ); + }), + false, + ) + .await?; + + // Multiple characters displayed as one, escaped characters + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some(":lineending crlf:char"), + Some(&|app| { + assert_eq!( + r#""\r\n" (U+000d U+000a) Hex 0d + 0a"#, + app.editor.get_status().unwrap().0 + ); + }), + false, + ) + .await?; + + // Non-UTF-8 + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some(":encoding asciiihh:char"), + Some(&|app| { + assert_eq!(r#""h" Dec 104 Hex 68"#, app.editor.get_status().unwrap().0); + }), + false, + ) + .await?; + + Ok(()) +} -- cgit v1.2.3-70-g09d2