From 636c91c76b2855a4ac58b3a030a9e45f88eb7502 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Mon, 26 Jun 2023 10:17:04 -0500 Subject: Mark buffers created from stdin as modified (#7431) This resolves some confusing behavior where a scratch document created by piping into hx is discarded when navigating away from that document. We discard any scratch documents that are not modified and the original `Editor::new_file_from_stdin` would create unmodified documents. We refactor this function to create an empty document first and then to apply the text from stdin as a change.--- helix-view/src/document.rs | 105 ++++++++++++++++++++++++++++++++++----------- helix-view/src/editor.rs | 21 ++++++--- 2 files changed, 95 insertions(+), 31 deletions(-) (limited to 'helix-view/src') diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index afcd3bff..d78d30d8 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -397,33 +397,11 @@ pub fn from_reader( let mut buf_out = [0u8; BUF_SIZE]; let mut builder = RopeBuilder::new(); - // By default, the encoding of the text is auto-detected by - // `encoding_rs` for_bom, and if it fails, from `chardetng` - // crate which requires sample data from the reader. - // As a manual override to this auto-detection is possible, the - // same data is read into `buf` to ensure symmetry in the upcoming - // loop. - let (encoding, has_bom, mut decoder, mut slice, mut is_empty) = { - let read = reader.read(&mut buf)?; - let is_empty = read == 0; - let (encoding, has_bom) = encoding - .map(|encoding| (encoding, false)) - .or_else(|| { - encoding::Encoding::for_bom(&buf).map(|(encoding, _bom_size)| (encoding, true)) - }) - .unwrap_or_else(|| { - let mut encoding_detector = chardetng::EncodingDetector::new(); - encoding_detector.feed(&buf, is_empty); - (encoding_detector.guess(None, true), false) - }); - - let decoder = encoding.new_decoder(); + let (encoding, has_bom, mut decoder, read) = + read_and_detect_encoding(reader, encoding, &mut buf)?; - // If the amount of bytes read from the reader is less than - // `buf.len()`, it is undesirable to read the bytes afterwards. - let slice = &buf[..read]; - (encoding, has_bom, decoder, slice, is_empty) - }; + let mut slice = &buf[..read]; + let mut is_empty = read == 0; // `RopeBuilder::append()` expects a `&str`, so this is the "real" // output buffer. When decoding, the number of bytes in the output @@ -493,6 +471,81 @@ pub fn from_reader( Ok((rope, encoding, has_bom)) } +pub fn read_to_string( + reader: &mut R, + encoding: Option<&'static Encoding>, +) -> Result<(String, &'static Encoding, bool), Error> { + let mut buf = [0u8; BUF_SIZE]; + + let (encoding, has_bom, mut decoder, read) = + read_and_detect_encoding(reader, encoding, &mut buf)?; + + let mut slice = &buf[..read]; + let mut is_empty = read == 0; + let mut buf_string = String::with_capacity(buf.len()); + + loop { + let mut total_read = 0usize; + + loop { + let (result, read, ..) = + decoder.decode_to_string(&slice[total_read..], &mut buf_string, is_empty); + + total_read += read; + + match result { + encoding::CoderResult::InputEmpty => { + debug_assert_eq!(slice.len(), total_read); + break; + } + encoding::CoderResult::OutputFull => { + debug_assert!(slice.len() > total_read); + buf_string.reserve(buf.len()) + } + } + } + + if is_empty { + debug_assert_eq!(reader.read(&mut buf)?, 0); + break; + } + + let read = reader.read(&mut buf)?; + slice = &buf[..read]; + is_empty = read == 0; + } + Ok((buf_string, encoding, has_bom)) +} + +/// Reads the first chunk from a Reader into the given buffer +/// and detects the encoding. +/// +/// By default, the encoding of the text is auto-detected by +/// `encoding_rs` for_bom, and if it fails, from `chardetng` +/// crate which requires sample data from the reader. +/// As a manual override to this auto-detection is possible, the +/// same data is read into `buf` to ensure symmetry in the upcoming +/// loop. +fn read_and_detect_encoding( + reader: &mut R, + encoding: Option<&'static Encoding>, + buf: &mut [u8], +) -> Result<(&'static Encoding, bool, encoding::Decoder, usize), Error> { + let read = reader.read(buf)?; + let is_empty = read == 0; + let (encoding, has_bom) = encoding + .map(|encoding| (encoding, false)) + .or_else(|| encoding::Encoding::for_bom(buf).map(|(encoding, _bom_size)| (encoding, true))) + .unwrap_or_else(|| { + let mut encoding_detector = chardetng::EncodingDetector::new(); + encoding_detector.feed(buf, is_empty); + (encoding_detector.guess(None, true), false) + }); + let decoder = encoding.new_decoder(); + + Ok((encoding, has_bom, decoder, read)) +} + // The documentation and implementation of this function should be up-to-date with // its sibling function, `from_reader()`. // diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 1a884c32..61d148d3 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -1386,11 +1386,22 @@ impl Editor { } pub fn new_file_from_stdin(&mut self, action: Action) -> Result { - let (rope, encoding, has_bom) = crate::document::from_reader(&mut stdin(), None)?; - Ok(self.new_file_from_document( - action, - Document::from(rope, Some((encoding, has_bom)), self.config.clone()), - )) + let (stdin, encoding, has_bom) = crate::document::read_to_string(&mut stdin(), None)?; + let doc = Document::from( + helix_core::Rope::default(), + Some((encoding, has_bom)), + self.config.clone(), + ); + let doc_id = self.new_file_from_document(action, doc); + let doc = doc_mut!(self, &doc_id); + let view = view_mut!(self); + doc.ensure_view_init(view.id); + let transaction = + helix_core::Transaction::insert(doc.text(), doc.selection(view.id), stdin.into()) + .with_selection(Selection::point(0)); + doc.apply(&transaction, view.id); + doc.append_changes_to_history(view); + Ok(doc_id) } // ??? possible use for integration tests -- cgit v1.2.3-70-g09d2