aboutsummaryrefslogtreecommitdiff
path: root/helix-view/src/document.rs
diff options
context:
space:
mode:
Diffstat (limited to 'helix-view/src/document.rs')
-rw-r--r--helix-view/src/document.rs105
1 files changed, 79 insertions, 26 deletions
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index afcd3bff..d78d30d8 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -397,33 +397,11 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
let mut buf_out = [0u8; BUF_SIZE];
let mut builder = RopeBuilder::new();
- // By default, the encoding of the text is auto-detected by
- // `encoding_rs` for_bom, and if it fails, from `chardetng`
- // crate which requires sample data from the reader.
- // As a manual override to this auto-detection is possible, the
- // same data is read into `buf` to ensure symmetry in the upcoming
- // loop.
- let (encoding, has_bom, mut decoder, mut slice, mut is_empty) = {
- let read = reader.read(&mut buf)?;
- let is_empty = read == 0;
- let (encoding, has_bom) = encoding
- .map(|encoding| (encoding, false))
- .or_else(|| {
- encoding::Encoding::for_bom(&buf).map(|(encoding, _bom_size)| (encoding, true))
- })
- .unwrap_or_else(|| {
- let mut encoding_detector = chardetng::EncodingDetector::new();
- encoding_detector.feed(&buf, is_empty);
- (encoding_detector.guess(None, true), false)
- });
-
- let decoder = encoding.new_decoder();
+ let (encoding, has_bom, mut decoder, read) =
+ read_and_detect_encoding(reader, encoding, &mut buf)?;
- // If the amount of bytes read from the reader is less than
- // `buf.len()`, it is undesirable to read the bytes afterwards.
- let slice = &buf[..read];
- (encoding, has_bom, decoder, slice, is_empty)
- };
+ let mut slice = &buf[..read];
+ let mut is_empty = read == 0;
// `RopeBuilder::append()` expects a `&str`, so this is the "real"
// output buffer. When decoding, the number of bytes in the output
@@ -493,6 +471,81 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
Ok((rope, encoding, has_bom))
}
+pub fn read_to_string<R: std::io::Read + ?Sized>(
+ reader: &mut R,
+ encoding: Option<&'static Encoding>,
+) -> Result<(String, &'static Encoding, bool), Error> {
+ let mut buf = [0u8; BUF_SIZE];
+
+ let (encoding, has_bom, mut decoder, read) =
+ read_and_detect_encoding(reader, encoding, &mut buf)?;
+
+ let mut slice = &buf[..read];
+ let mut is_empty = read == 0;
+ let mut buf_string = String::with_capacity(buf.len());
+
+ loop {
+ let mut total_read = 0usize;
+
+ loop {
+ let (result, read, ..) =
+ decoder.decode_to_string(&slice[total_read..], &mut buf_string, is_empty);
+
+ total_read += read;
+
+ match result {
+ encoding::CoderResult::InputEmpty => {
+ debug_assert_eq!(slice.len(), total_read);
+ break;
+ }
+ encoding::CoderResult::OutputFull => {
+ debug_assert!(slice.len() > total_read);
+ buf_string.reserve(buf.len())
+ }
+ }
+ }
+
+ if is_empty {
+ debug_assert_eq!(reader.read(&mut buf)?, 0);
+ break;
+ }
+
+ let read = reader.read(&mut buf)?;
+ slice = &buf[..read];
+ is_empty = read == 0;
+ }
+ Ok((buf_string, encoding, has_bom))
+}
+
+/// Reads the first chunk from a Reader into the given buffer
+/// and detects the encoding.
+///
+/// By default, the encoding of the text is auto-detected by
+/// `encoding_rs` for_bom, and if it fails, from `chardetng`
+/// crate which requires sample data from the reader.
+/// As a manual override to this auto-detection is possible, the
+/// same data is read into `buf` to ensure symmetry in the upcoming
+/// loop.
+fn read_and_detect_encoding<R: std::io::Read + ?Sized>(
+ reader: &mut R,
+ encoding: Option<&'static Encoding>,
+ buf: &mut [u8],
+) -> Result<(&'static Encoding, bool, encoding::Decoder, usize), Error> {
+ let read = reader.read(buf)?;
+ let is_empty = read == 0;
+ let (encoding, has_bom) = encoding
+ .map(|encoding| (encoding, false))
+ .or_else(|| encoding::Encoding::for_bom(buf).map(|(encoding, _bom_size)| (encoding, true)))
+ .unwrap_or_else(|| {
+ let mut encoding_detector = chardetng::EncodingDetector::new();
+ encoding_detector.feed(buf, is_empty);
+ (encoding_detector.guess(None, true), false)
+ });
+ let decoder = encoding.new_decoder();
+
+ Ok((encoding, has_bom, decoder, read))
+}
+
// The documentation and implementation of this function should be up-to-date with
// its sibling function, `from_reader()`.
//