diff options
author | William Etheredge | 2023-02-03 14:24:46 +0000 |
---|---|---|
committer | GitHub | 2023-02-03 14:24:46 +0000 |
commit | f7bd7b5eafac9f016a470cc77c780922f83e690b (patch) | |
tree | 84e5cd9d28377c14b208bab68cc5487b1289d506 | |
parent | d8f482e11e2dec544ede636464cf6a87e32bd1f7 (diff) |
Add :character-info command (#4000)
-rw-r--r-- | book/src/generated/typable-cmd.md | 1 | ||||
-rw-r--r-- | helix-term/src/commands/typed.rs | 133 | ||||
-rw-r--r-- | helix-term/tests/test/commands.rs | 58 |
3 files changed, 192 insertions, 0 deletions
diff --git a/book/src/generated/typable-cmd.md b/book/src/generated/typable-cmd.md index 66e6ac03..0ff501a3 100644 --- a/book/src/generated/typable-cmd.md +++ b/book/src/generated/typable-cmd.md @@ -43,6 +43,7 @@ | `:change-current-directory`, `:cd` | Change the current working directory. | | `:show-directory`, `:pwd` | Show the current working directory. | | `:encoding` | Set encoding. Based on `https://encoding.spec.whatwg.org`. | +| `:character-info`, `:char` | Get info about the character under the primary cursor. | | `:reload` | Discard changes and reload from the source file. | | `:reload-all` | Discard changes and reload all documents from the source files. | | `:update` | Write changes only if the file has been modified. | diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index f2495d8c..1fd11b65 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -4,6 +4,7 @@ use crate::job::Job; use super::*; +use helix_core::encoding; use helix_view::editor::{Action, CloseError, ConfigEvent}; use ui::completers::{self, Completer}; @@ -1033,6 +1034,131 @@ fn set_encoding( } } +/// Shows info about the character under the primary cursor. +fn get_character_info( + cx: &mut compositor::Context, + _args: &[Cow<str>], + event: PromptEvent, +) -> anyhow::Result<()> { + if event != PromptEvent::Validate { + return Ok(()); + } + + let (view, doc) = current_ref!(cx.editor); + let text = doc.text().slice(..); + + let grapheme_start = doc.selection(view.id).primary().cursor(text); + let grapheme_end = graphemes::next_grapheme_boundary(text, grapheme_start); + + if grapheme_start == grapheme_end { + return Ok(()); + } + + let grapheme = text.slice(grapheme_start..grapheme_end).to_string(); + let encoding = doc.encoding(); + + let printable = grapheme.chars().fold(String::new(), |mut s, c| { + match c { + '\0' => s.push_str("\\0"), + '\t' => s.push_str("\\t"), + '\n' => s.push_str("\\n"), + '\r' => s.push_str("\\r"), + _ => s.push(c), + } + + s + }); + + // Convert to Unicode codepoints if in UTF-8 + let unicode = if encoding == encoding::UTF_8 { + let mut unicode = " (".to_owned(); + + for (i, char) in grapheme.chars().enumerate() { + if i != 0 { + unicode.push(' '); + } + + unicode.push_str("U+"); + + let codepoint: u32 = if char.is_ascii() { + char.into() + } else { + // Not ascii means it will be multi-byte, so strip out the extra + // bits that encode the length & mark continuation bytes + + let s = String::from(char); + let bytes = s.as_bytes(); + + // First byte starts with 2-4 ones then a zero, so strip those off + let first = bytes[0]; + let codepoint = first & (0xFF >> (first.leading_ones() + 1)); + let mut codepoint = u32::from(codepoint); + + // Following bytes start with 10 + for byte in bytes.iter().skip(1) { + codepoint <<= 6; + codepoint += u32::from(*byte) & 0x3F; + } + + codepoint + }; + + unicode.push_str(&format!("{codepoint:0>4x}")); + } + + unicode.push(')'); + unicode + } else { + String::new() + }; + + // Give the decimal value for ascii characters + let dec = if encoding.is_ascii_compatible() && grapheme.len() == 1 { + format!(" Dec {}", grapheme.as_bytes()[0]) + } else { + String::new() + }; + + let hex = { + let mut encoder = encoding.new_encoder(); + let max_encoded_len = encoder + .max_buffer_length_from_utf8_without_replacement(grapheme.len()) + .unwrap(); + let mut bytes = Vec::with_capacity(max_encoded_len); + let mut current_byte = 0; + let mut hex = String::new(); + + for (i, char) in grapheme.chars().enumerate() { + if i != 0 { + hex.push_str(" +"); + } + + let (result, _input_bytes_read) = encoder.encode_from_utf8_to_vec_without_replacement( + &char.to_string(), + &mut bytes, + true, + ); + + if let encoding::EncoderResult::Unmappable(char) = result { + bail!("{char:?} cannot be mapped to {}", encoding.name()); + } + + for byte in &bytes[current_byte..] { + hex.push_str(&format!(" {byte:0>2x}")); + } + + current_byte = bytes.len(); + } + + hex + }; + + cx.editor + .set_status(format!("\"{printable}\"{unicode}{dec} Hex{hex}")); + + Ok(()) +} + /// Reload the [`Document`] from its source file. fn reload( cx: &mut compositor::Context, @@ -2132,6 +2258,13 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[ completer: None, }, TypableCommand { + name: "character-info", + aliases: &["char"], + doc: "Get info about the character under the primary cursor.", + fun: get_character_info, + completer: None, + }, + TypableCommand { name: "reload", aliases: &[], doc: "Discard changes and reload from the source file.", diff --git a/helix-term/tests/test/commands.rs b/helix-term/tests/test/commands.rs index 6e7275f5..da2e020e 100644 --- a/helix-term/tests/test/commands.rs +++ b/helix-term/tests/test/commands.rs @@ -354,3 +354,61 @@ async fn test_extend_line() -> anyhow::Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread")] +async fn test_character_info() -> anyhow::Result<()> { + // UTF-8, single byte + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some("ih<esc>h:char<ret>"), + Some(&|app| { + assert_eq!( + r#""h" (U+0068) Dec 104 Hex 68"#, + app.editor.get_status().unwrap().0 + ); + }), + false, + ) + .await?; + + // UTF-8, multi-byte + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some("ië<esc>h:char<ret>"), + Some(&|app| { + assert_eq!( + r#""ë" (U+0065 U+0308) Hex 65 + cc 88"#, + app.editor.get_status().unwrap().0 + ); + }), + false, + ) + .await?; + + // Multiple characters displayed as one, escaped characters + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some(":line<minus>ending crlf<ret>:char<ret>"), + Some(&|app| { + assert_eq!( + r#""\r\n" (U+000d U+000a) Hex 0d + 0a"#, + app.editor.get_status().unwrap().0 + ); + }), + false, + ) + .await?; + + // Non-UTF-8 + test_key_sequence( + &mut helpers::AppBuilder::new().build()?, + Some(":encoding ascii<ret>ih<esc>h:char<ret>"), + Some(&|app| { + assert_eq!(r#""h" Dec 104 Hex 68"#, app.editor.get_status().unwrap().0); + }), + false, + ) + .await?; + + Ok(()) +} |