1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
|
use crate::text::StyledGrapheme;
use helix_core::line_ending::str_is_line_ending;
use helix_core::unicode::width::UnicodeWidthStr;
use unicode_segmentation::UnicodeSegmentation;
const NBSP: &str = "\u{00a0}";
/// A state machine to pack styled symbols into lines.
/// Cannot implement it as Iterator since it yields slices of the internal buffer (need streaming
/// iterators for that).
pub trait LineComposer<'a> {
fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)>;
}
/// A state machine that wraps lines on word boundaries.
pub struct WordWrapper<'a, 'b> {
symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
max_line_width: u16,
current_line: Vec<StyledGrapheme<'a>>,
next_line: Vec<StyledGrapheme<'a>>,
/// Removes the leading whitespace from lines
trim: bool,
}
impl<'a, 'b> WordWrapper<'a, 'b> {
pub fn new(
symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
max_line_width: u16,
trim: bool,
) -> WordWrapper<'a, 'b> {
WordWrapper {
symbols,
max_line_width,
current_line: vec![],
next_line: vec![],
trim,
}
}
}
impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> {
fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> {
if self.max_line_width == 0 {
return None;
}
std::mem::swap(&mut self.current_line, &mut self.next_line);
self.next_line.truncate(0);
let mut current_line_width = self
.current_line
.iter()
.map(|StyledGrapheme { symbol, .. }| symbol.width() as u16)
.sum();
let mut symbols_to_last_word_end: usize = 0;
let mut width_to_last_word_end: u16 = 0;
let mut prev_whitespace = false;
let mut symbols_exhausted = true;
for StyledGrapheme { symbol, style } in &mut self.symbols {
symbols_exhausted = false;
let symbol_whitespace = symbol.chars().all(&char::is_whitespace) && symbol != NBSP;
// Ignore characters wider that the total max width.
if symbol.width() as u16 > self.max_line_width
// Skip leading whitespace when trim is enabled.
|| self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0
{
continue;
}
// Break on newline and discard it.
if str_is_line_ending(symbol) {
if prev_whitespace {
current_line_width = width_to_last_word_end;
self.current_line.truncate(symbols_to_last_word_end);
}
break;
}
// Mark the previous symbol as word end.
if symbol_whitespace && !prev_whitespace {
symbols_to_last_word_end = self.current_line.len();
width_to_last_word_end = current_line_width;
}
self.current_line.push(StyledGrapheme { symbol, style });
current_line_width += symbol.width() as u16;
if current_line_width > self.max_line_width {
// If there was no word break in the text, wrap at the end of the line.
let (truncate_at, truncated_width) = if symbols_to_last_word_end != 0 {
(symbols_to_last_word_end, width_to_last_word_end)
} else {
(self.current_line.len() - 1, self.max_line_width)
};
// Push the remainder to the next line but strip leading whitespace:
{
let remainder = &self.current_line[truncate_at..];
if let Some(remainder_nonwhite) =
remainder.iter().position(|StyledGrapheme { symbol, .. }| {
!symbol.chars().all(&char::is_whitespace)
})
{
self.next_line
.extend_from_slice(&remainder[remainder_nonwhite..]);
}
}
self.current_line.truncate(truncate_at);
current_line_width = truncated_width;
break;
}
prev_whitespace = symbol_whitespace;
}
// Even if the iterator is exhausted, pass the previous remainder.
if symbols_exhausted && self.current_line.is_empty() {
None
} else {
Some((&self.current_line[..], current_line_width))
}
}
}
/// A state machine that truncates overhanging lines.
pub struct LineTruncator<'a, 'b> {
symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
max_line_width: u16,
current_line: Vec<StyledGrapheme<'a>>,
/// Record the offet to skip render
horizontal_offset: u16,
}
impl<'a, 'b> LineTruncator<'a, 'b> {
pub fn new(
symbols: &'b mut dyn Iterator<Item = StyledGrapheme<'a>>,
max_line_width: u16,
) -> LineTruncator<'a, 'b> {
LineTruncator {
symbols,
max_line_width,
horizontal_offset: 0,
current_line: vec![],
}
}
pub fn set_horizontal_offset(&mut self, horizontal_offset: u16) {
self.horizontal_offset = horizontal_offset;
}
}
impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> {
if self.max_line_width == 0 {
return None;
}
self.current_line.truncate(0);
let mut current_line_width = 0;
let mut skip_rest = false;
let mut symbols_exhausted = true;
let mut horizontal_offset = self.horizontal_offset as usize;
for StyledGrapheme { symbol, style } in &mut self.symbols {
symbols_exhausted = false;
// Ignore characters wider that the total max width.
if symbol.width() as u16 > self.max_line_width {
continue;
}
// Break on newline and discard it.
if str_is_line_ending(symbol) {
break;
}
if current_line_width + symbol.width() as u16 > self.max_line_width {
// Exhaust the remainder of the line.
skip_rest = true;
break;
}
let symbol = if horizontal_offset == 0 {
symbol
} else {
let w = symbol.width();
if w > horizontal_offset {
let t = trim_offset(symbol, horizontal_offset);
horizontal_offset = 0;
t
} else {
horizontal_offset -= w;
""
}
};
current_line_width += symbol.width() as u16;
self.current_line.push(StyledGrapheme { symbol, style });
}
if skip_rest {
for StyledGrapheme { symbol, .. } in &mut self.symbols {
if str_is_line_ending(symbol) {
break;
}
}
}
if symbols_exhausted && self.current_line.is_empty() {
None
} else {
Some((&self.current_line[..], current_line_width))
}
}
}
/// This function will return a str slice which start at specified offset.
/// As src is a unicode str, start offset has to be calculated with each character.
fn trim_offset(src: &str, mut offset: usize) -> &str {
let mut start = 0;
for c in UnicodeSegmentation::graphemes(src, true) {
let w = c.width();
if w <= offset {
offset -= w;
start += c.len();
} else {
break;
}
}
&src[start..]
}
#[cfg(test)]
mod test {
use super::*;
use unicode_segmentation::UnicodeSegmentation;
enum Composer {
WordWrapper { trim: bool },
LineTruncator,
}
fn run_composer(which: Composer, text: &str, text_area_width: u16) -> (Vec<String>, Vec<u16>) {
let style = Default::default();
let mut styled =
UnicodeSegmentation::graphemes(text, true).map(|g| StyledGrapheme { symbol: g, style });
let mut composer: Box<dyn LineComposer> = match which {
Composer::WordWrapper { trim } => {
Box::new(WordWrapper::new(&mut styled, text_area_width, trim))
}
Composer::LineTruncator => Box::new(LineTruncator::new(&mut styled, text_area_width)),
};
let mut lines = vec![];
let mut widths = vec![];
while let Some((styled, width)) = composer.next_line() {
let line = styled
.iter()
.map(|StyledGrapheme { symbol, .. }| *symbol)
.collect::<String>();
assert!(width <= text_area_width);
lines.push(line);
widths.push(width);
}
(lines, widths)
}
#[test]
fn line_composer_one_line() {
let width = 40;
for i in 1..width {
let text = "a".repeat(i);
let (word_wrapper, _) =
run_composer(Composer::WordWrapper { trim: true }, &text, width as u16);
let (line_truncator, _) = run_composer(Composer::LineTruncator, &text, width as u16);
let expected = vec![text];
assert_eq!(word_wrapper, expected);
assert_eq!(line_truncator, expected);
}
}
#[test]
fn line_composer_short_lines() {
let width = 20;
let text =
"abcdefg\nhijklmno\npabcdefg\nhijklmn\nopabcdefghijk\nlmnopabcd\n\n\nefghijklmno";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
let wrapped: Vec<&str> = text.split('\n').collect();
assert_eq!(word_wrapper, wrapped);
assert_eq!(line_truncator, wrapped);
}
#[test]
fn line_composer_long_word() {
let width = 20;
let text = "abcdefghijklmnopabcdefghijklmnopabcdefghijklmnopabcdefghijklmno";
let (word_wrapper, _) =
run_composer(Composer::WordWrapper { trim: true }, text, width as u16);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width as u16);
let wrapped = vec![
&text[..width],
&text[width..width * 2],
&text[width * 2..width * 3],
&text[width * 3..],
];
assert_eq!(
word_wrapper, wrapped,
"WordWrapper should detect the line cannot be broken on word boundary and \
break it at line width limit."
);
assert_eq!(line_truncator, vec![&text[..width]]);
}
#[test]
fn line_composer_long_sentence() {
let width = 20;
let text =
"abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab c d e f g h i j k l m n o";
let text_multi_space =
"abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab c d e f g h i j k l \
m n o";
let (word_wrapper_single_space, _) =
run_composer(Composer::WordWrapper { trim: true }, text, width as u16);
let (word_wrapper_multi_space, _) = run_composer(
Composer::WordWrapper { trim: true },
text_multi_space,
width as u16,
);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width as u16);
let word_wrapped = vec![
"abcd efghij",
"klmnopabcd efgh",
"ijklmnopabcdefg",
"hijkl mnopab c d e f",
"g h i j k l m n o",
];
assert_eq!(word_wrapper_single_space, word_wrapped);
assert_eq!(word_wrapper_multi_space, word_wrapped);
assert_eq!(line_truncator, vec![&text[..width]]);
}
#[test]
fn line_composer_zero_width() {
let width = 0;
let text = "abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab ";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
let expected: Vec<&str> = Vec::new();
assert_eq!(word_wrapper, expected);
assert_eq!(line_truncator, expected);
}
#[test]
fn line_composer_max_line_width_of_1() {
let width = 1;
let text = "abcd efghij klmnopabcd efgh ijklmnopabcdefg hijkl mnopab ";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
let expected: Vec<&str> = UnicodeSegmentation::graphemes(text, true)
.filter(|g| g.chars().any(|c| !c.is_whitespace()))
.collect();
assert_eq!(word_wrapper, expected);
assert_eq!(line_truncator, vec!["a"]);
}
#[test]
fn line_composer_max_line_width_of_1_double_width_characters() {
let width = 1;
let text = "コンピュータ上で文字を扱う場合、典型的には文字\naaaによる通信を行う場合にその\
両端点では、";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
assert_eq!(word_wrapper, vec!["", "a", "a", "a"]);
assert_eq!(line_truncator, vec!["", "a"]);
}
/// Tests WordWrapper with words some of which exceed line length and some not.
#[test]
fn line_composer_word_wrapper_mixed_length() {
let width = 20;
let text = "abcd efghij klmnopabcdefghijklmnopabcdefghijkl mnopab cdefghi j klmno";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
assert_eq!(
word_wrapper,
vec![
"abcd efghij",
"klmnopabcdefghijklmn",
"opabcdefghijkl",
"mnopab cdefghi j",
"klmno",
]
)
}
#[test]
fn line_composer_double_width_chars() {
let width = 20;
let text = "コンピュータ上で文字を扱う場合、典型的には文字による通信を行う場合にその両端点\
では、";
let (word_wrapper, word_wrapper_width) =
run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
assert_eq!(line_truncator, vec!["コンピュータ上で文字"]);
let wrapped = vec![
"コンピュータ上で文字",
"を扱う場合、典型的に",
"は文字による通信を行",
"う場合にその両端点で",
"は、",
];
assert_eq!(word_wrapper, wrapped);
assert_eq!(word_wrapper_width, vec![width, width, width, width, 4]);
}
#[test]
fn line_composer_leading_whitespace_removal() {
let width = 20;
let text = "AAAAAAAAAAAAAAAAAAAA AAA";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
assert_eq!(word_wrapper, vec!["AAAAAAAAAAAAAAAAAAAA", "AAA",]);
assert_eq!(line_truncator, vec!["AAAAAAAAAAAAAAAAAAAA"]);
}
/// Tests truncation of leading whitespace.
#[test]
fn line_composer_lots_of_spaces() {
let width = 20;
let text = " ";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
assert_eq!(word_wrapper, vec![""]);
assert_eq!(line_truncator, vec![" "]);
}
/// Tests an input starting with a letter, folowed by spaces - some of the behaviour is
/// incidental.
#[test]
fn line_composer_char_plus_lots_of_spaces() {
let width = 20;
let text = "a ";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
let (line_truncator, _) = run_composer(Composer::LineTruncator, text, width);
// What's happening below is: the first line gets consumed, trailing spaces discarded,
// after 20 of which a word break occurs (probably shouldn't). The second line break
// discards all whitespace. The result should probably be vec!["a"] but it doesn't matter
// that much.
assert_eq!(word_wrapper, vec!["a", ""]);
assert_eq!(line_truncator, vec!["a "]);
}
#[test]
fn line_composer_word_wrapper_double_width_chars_mixed_with_spaces() {
let width = 20;
// Japanese seems not to use spaces but we should break on spaces anyway... We're using it
// to test double-width chars.
// You are more than welcome to add word boundary detection based of alterations of
// hiragana and katakana...
// This happens to also be a test case for mixed width because regular spaces are single width.
let text = "コンピュ ータ上で文字を扱う場合、 典型的には文 字による 通信を行 う場合にその両端点では、";
let (word_wrapper, word_wrapper_width) =
run_composer(Composer::WordWrapper { trim: true }, text, width);
assert_eq!(
word_wrapper,
vec![
"コンピュ",
"ータ上で文字を扱う場",
"合、 典型的には文",
"字による 通信を行",
"う場合にその両端点で",
"は、",
]
);
// Odd-sized lines have a space in them.
assert_eq!(word_wrapper_width, vec![8, 20, 17, 17, 20, 4]);
}
/// Ensure words separated by nbsp are wrapped as if they were a single one.
#[test]
fn line_composer_word_wrapper_nbsp() {
let width = 20;
let text = "AAAAAAAAAAAAAAA AAAA\u{00a0}AAA";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: true }, text, width);
assert_eq!(word_wrapper, vec!["AAAAAAAAAAAAAAA", "AAAA\u{00a0}AAA",]);
// Ensure that if the character was a regular space, it would be wrapped differently.
let text_space = text.replace('\u{00a0}', " ");
let (word_wrapper_space, _) =
run_composer(Composer::WordWrapper { trim: true }, &text_space, width);
assert_eq!(word_wrapper_space, vec!["AAAAAAAAAAAAAAA AAAA", "AAA",]);
}
#[test]
fn line_composer_word_wrapper_preserve_indentation() {
let width = 20;
let text = "AAAAAAAAAAAAAAAAAAAA AAA";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: false }, text, width);
assert_eq!(word_wrapper, vec!["AAAAAAAAAAAAAAAAAAAA", " AAA",]);
}
#[test]
fn line_composer_word_wrapper_preserve_indentation_with_wrap() {
let width = 10;
let text = "AAA AAA AAAAA AA AAAAAA\n B\n C\n D";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: false }, text, width);
assert_eq!(
word_wrapper,
vec!["AAA AAA", "AAAAA AA", "AAAAAA", " B", " C", " D"]
);
}
#[test]
fn line_composer_word_wrapper_preserve_indentation_lots_of_whitespace() {
let width = 10;
let text = " 4 Indent\n must wrap!";
let (word_wrapper, _) = run_composer(Composer::WordWrapper { trim: false }, text, width);
assert_eq!(
word_wrapper,
vec![
" ",
" 4",
"Indent",
" ",
" must",
"wrap!"
]
);
}
}
|