aboutsummaryrefslogtreecommitdiff
path: root/helix-term
diff options
context:
space:
mode:
authorWilliam Etheredge2023-02-03 14:24:46 +0000
committerGitHub2023-02-03 14:24:46 +0000
commitf7bd7b5eafac9f016a470cc77c780922f83e690b (patch)
tree84e5cd9d28377c14b208bab68cc5487b1289d506 /helix-term
parentd8f482e11e2dec544ede636464cf6a87e32bd1f7 (diff)
Add :character-info command (#4000)
Diffstat (limited to 'helix-term')
-rw-r--r--helix-term/src/commands/typed.rs133
-rw-r--r--helix-term/tests/test/commands.rs58
2 files changed, 191 insertions, 0 deletions
diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs
index f2495d8c..1fd11b65 100644
--- a/helix-term/src/commands/typed.rs
+++ b/helix-term/src/commands/typed.rs
@@ -4,6 +4,7 @@ use crate::job::Job;
use super::*;
+use helix_core::encoding;
use helix_view::editor::{Action, CloseError, ConfigEvent};
use ui::completers::{self, Completer};
@@ -1033,6 +1034,131 @@ fn set_encoding(
}
}
+/// Shows info about the character under the primary cursor.
+fn get_character_info(
+ cx: &mut compositor::Context,
+ _args: &[Cow<str>],
+ event: PromptEvent,
+) -> anyhow::Result<()> {
+ if event != PromptEvent::Validate {
+ return Ok(());
+ }
+
+ let (view, doc) = current_ref!(cx.editor);
+ let text = doc.text().slice(..);
+
+ let grapheme_start = doc.selection(view.id).primary().cursor(text);
+ let grapheme_end = graphemes::next_grapheme_boundary(text, grapheme_start);
+
+ if grapheme_start == grapheme_end {
+ return Ok(());
+ }
+
+ let grapheme = text.slice(grapheme_start..grapheme_end).to_string();
+ let encoding = doc.encoding();
+
+ let printable = grapheme.chars().fold(String::new(), |mut s, c| {
+ match c {
+ '\0' => s.push_str("\\0"),
+ '\t' => s.push_str("\\t"),
+ '\n' => s.push_str("\\n"),
+ '\r' => s.push_str("\\r"),
+ _ => s.push(c),
+ }
+
+ s
+ });
+
+ // Convert to Unicode codepoints if in UTF-8
+ let unicode = if encoding == encoding::UTF_8 {
+ let mut unicode = " (".to_owned();
+
+ for (i, char) in grapheme.chars().enumerate() {
+ if i != 0 {
+ unicode.push(' ');
+ }
+
+ unicode.push_str("U+");
+
+ let codepoint: u32 = if char.is_ascii() {
+ char.into()
+ } else {
+ // Not ascii means it will be multi-byte, so strip out the extra
+ // bits that encode the length & mark continuation bytes
+
+ let s = String::from(char);
+ let bytes = s.as_bytes();
+
+ // First byte starts with 2-4 ones then a zero, so strip those off
+ let first = bytes[0];
+ let codepoint = first & (0xFF >> (first.leading_ones() + 1));
+ let mut codepoint = u32::from(codepoint);
+
+ // Following bytes start with 10
+ for byte in bytes.iter().skip(1) {
+ codepoint <<= 6;
+ codepoint += u32::from(*byte) & 0x3F;
+ }
+
+ codepoint
+ };
+
+ unicode.push_str(&format!("{codepoint:0>4x}"));
+ }
+
+ unicode.push(')');
+ unicode
+ } else {
+ String::new()
+ };
+
+ // Give the decimal value for ascii characters
+ let dec = if encoding.is_ascii_compatible() && grapheme.len() == 1 {
+ format!(" Dec {}", grapheme.as_bytes()[0])
+ } else {
+ String::new()
+ };
+
+ let hex = {
+ let mut encoder = encoding.new_encoder();
+ let max_encoded_len = encoder
+ .max_buffer_length_from_utf8_without_replacement(grapheme.len())
+ .unwrap();
+ let mut bytes = Vec::with_capacity(max_encoded_len);
+ let mut current_byte = 0;
+ let mut hex = String::new();
+
+ for (i, char) in grapheme.chars().enumerate() {
+ if i != 0 {
+ hex.push_str(" +");
+ }
+
+ let (result, _input_bytes_read) = encoder.encode_from_utf8_to_vec_without_replacement(
+ &char.to_string(),
+ &mut bytes,
+ true,
+ );
+
+ if let encoding::EncoderResult::Unmappable(char) = result {
+ bail!("{char:?} cannot be mapped to {}", encoding.name());
+ }
+
+ for byte in &bytes[current_byte..] {
+ hex.push_str(&format!(" {byte:0>2x}"));
+ }
+
+ current_byte = bytes.len();
+ }
+
+ hex
+ };
+
+ cx.editor
+ .set_status(format!("\"{printable}\"{unicode}{dec} Hex{hex}"));
+
+ Ok(())
+}
+
/// Reload the [`Document`] from its source file.
fn reload(
cx: &mut compositor::Context,
@@ -2132,6 +2258,13 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[
completer: None,
},
TypableCommand {
+ name: "character-info",
+ aliases: &["char"],
+ doc: "Get info about the character under the primary cursor.",
+ fun: get_character_info,
+ completer: None,
+ },
+ TypableCommand {
name: "reload",
aliases: &[],
doc: "Discard changes and reload from the source file.",
diff --git a/helix-term/tests/test/commands.rs b/helix-term/tests/test/commands.rs
index 6e7275f5..da2e020e 100644
--- a/helix-term/tests/test/commands.rs
+++ b/helix-term/tests/test/commands.rs
@@ -354,3 +354,61 @@ async fn test_extend_line() -> anyhow::Result<()> {
Ok(())
}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn test_character_info() -> anyhow::Result<()> {
+ // UTF-8, single byte
+ test_key_sequence(
+ &mut helpers::AppBuilder::new().build()?,
+ Some("ih<esc>h:char<ret>"),
+ Some(&|app| {
+ assert_eq!(
+ r#""h" (U+0068) Dec 104 Hex 68"#,
+ app.editor.get_status().unwrap().0
+ );
+ }),
+ false,
+ )
+ .await?;
+
+ // UTF-8, multi-byte
+ test_key_sequence(
+ &mut helpers::AppBuilder::new().build()?,
+ Some("ië<esc>h:char<ret>"),
+ Some(&|app| {
+ assert_eq!(
+ r#""ë" (U+0065 U+0308) Hex 65 + cc 88"#,
+ app.editor.get_status().unwrap().0
+ );
+ }),
+ false,
+ )
+ .await?;
+
+ // Multiple characters displayed as one, escaped characters
+ test_key_sequence(
+ &mut helpers::AppBuilder::new().build()?,
+ Some(":line<minus>ending crlf<ret>:char<ret>"),
+ Some(&|app| {
+ assert_eq!(
+ r#""\r\n" (U+000d U+000a) Hex 0d + 0a"#,
+ app.editor.get_status().unwrap().0
+ );
+ }),
+ false,
+ )
+ .await?;
+
+ // Non-UTF-8
+ test_key_sequence(
+ &mut helpers::AppBuilder::new().build()?,
+ Some(":encoding ascii<ret>ih<esc>h:char<ret>"),
+ Some(&|app| {
+ assert_eq!(r#""h" Dec 104 Hex 68"#, app.editor.get_status().unwrap().0);
+ }),
+ false,
+ )
+ .await?;
+
+ Ok(())
+}