From a14291e4a31f6b159ee3080b34731ec793bdb263 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Wed, 16 Oct 2024 18:01:07 +0900 Subject: [PATCH 1/3] text_util: add elide_end() function This function mirrors elide_start(), literally. We don't have any callers for the moment, but it helps write tests of inner truncation helpers. I'm going to add bytes version of these functions to implement "truncate" template functions. --- cli/src/text_util.rs | 128 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/cli/src/text_util.rs b/cli/src/text_util.rs index 32c92dbc9b..5d0307efa7 100644 --- a/cli/src/text_util.rs +++ b/cli/src/text_util.rs @@ -66,6 +66,31 @@ pub fn elide_start<'a>( (Cow::Owned([ellipsis, text].concat()), concat_width) } +/// Shortens `text` to `max_width` by removing trailing characters. `ellipsis` +/// is added if the `text` gets truncated. +/// +/// The returned string (including `ellipsis`) never exceeds the `max_width`. +pub fn elide_end<'a>(text: &'a str, ellipsis: &'a str, max_width: usize) -> (Cow<'a, str>, usize) { + let (text_end, text_width) = truncate_end_pos(text, max_width); + if text_end == text.len() { + return (Cow::Borrowed(text), text_width); + } + + let (ellipsis_end, ellipsis_width) = truncate_end_pos(ellipsis, max_width); + if ellipsis_end != ellipsis.len() { + let ellipsis = &ellipsis[..ellipsis_end]; + return (Cow::Borrowed(ellipsis), ellipsis_width); + } + + let text = &text[..text_end]; + let max_text_width = max_width - ellipsis_width; + let (skip, skipped_width) = skip_end_pos(text, text_width.saturating_sub(max_text_width)); + let text = &text[..skip]; + let concat_width = (text_width - skipped_width) + ellipsis_width; + assert!(concat_width <= max_width); + (Cow::Owned([text, ellipsis].concat()), concat_width) +} + /// Shortens `text` to `max_width` by removing leading characters, returning /// `(start_index, width)`. /// @@ -83,6 +108,20 @@ fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) { (0, acc_width) } +/// Shortens `text` to `max_width` by removing trailing characters, returning +/// `(end_index, width)`. +fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) { + let mut acc_width = 0; + for (i, c) in text.char_indices() { + let new_width = acc_width + c.width().unwrap_or(0); + if new_width > max_width { + return (i, acc_width); + } + acc_width = new_width; + } + (text.len(), acc_width) +} + /// Skips `width` leading characters, returning `(start_index, skipped_width)`. /// /// The `skipped_width` may exceed the given `width` if `width` is not at @@ -100,6 +139,22 @@ fn skip_start_pos(text: &str, width: usize) -> (usize, usize) { (text.len(), acc_width) } +/// Skips `width` trailing characters, returning `(end_index, skipped_width)`. +/// +/// The `skipped_width` may exceed the given `width` if `width` is not at +/// character boundary. +fn skip_end_pos(text: &str, width: usize) -> (usize, usize) { + let mut acc_width = 0; + for (i, c) in text.char_indices().rev() { + if acc_width >= width { + let prev_index = i + c.len_utf8(); + return (prev_index, acc_width); + } + acc_width += c.width().unwrap_or(0); + } + (0, acc_width) +} + /// Removes leading 0-width characters. fn trim_start_zero_width_chars(text: &str) -> &str { text.trim_start_matches(|c: char| c.width().unwrap_or(0) == 0) @@ -370,6 +425,79 @@ mod tests { ); } + #[test] + fn test_elide_end() { + // Empty string + assert_eq!(elide_end("", "", 1), ("".into(), 0)); + + // Basic truncation + assert_eq!(elide_end("abcdef", "", 6), ("abcdef".into(), 6)); + assert_eq!(elide_end("abcdef", "", 5), ("abcde".into(), 5)); + assert_eq!(elide_end("abcdef", "", 1), ("a".into(), 1)); + assert_eq!(elide_end("abcdef", "", 0), ("".into(), 0)); + assert_eq!(elide_end("abcdef", "-=~", 6), ("abcdef".into(), 6)); + assert_eq!(elide_end("abcdef", "-=~", 5), ("ab-=~".into(), 5)); + assert_eq!(elide_end("abcdef", "-=~", 4), ("a-=~".into(), 4)); + assert_eq!(elide_end("abcdef", "-=~", 3), ("-=~".into(), 3)); + assert_eq!(elide_end("abcdef", "-=~", 2), ("-=".into(), 2)); + assert_eq!(elide_end("abcdef", "-=~", 1), ("-".into(), 1)); + assert_eq!(elide_end("abcdef", "-=~", 0), ("".into(), 0)); + + // East Asian characters (char.width() == 2) + assert_eq!(elide_end("一二三", "", 6), ("一二三".into(), 6)); + assert_eq!(elide_end("一二三", "", 5), ("一二".into(), 4)); + assert_eq!(elide_end("一二三", "", 4), ("一二".into(), 4)); + assert_eq!(elide_end("一二三", "", 1), ("".into(), 0)); + assert_eq!(elide_end("一二三", "-=~", 6), ("一二三".into(), 6)); + assert_eq!(elide_end("一二三", "-=~", 5), ("一-=~".into(), 5)); + assert_eq!(elide_end("一二三", "-=~", 4), ("-=~".into(), 3)); + assert_eq!(elide_end("一二三", "略", 6), ("一二三".into(), 6)); + assert_eq!(elide_end("一二三", "略", 5), ("一略".into(), 4)); + assert_eq!(elide_end("一二三", "略", 4), ("一略".into(), 4)); + assert_eq!(elide_end("一二三", "略", 2), ("略".into(), 2)); + assert_eq!(elide_end("一二三", "略", 1), ("".into(), 0)); + assert_eq!(elide_end("一二三", ".", 5), ("一二.".into(), 5)); + assert_eq!(elide_end("一二三", ".", 4), ("一.".into(), 3)); + assert_eq!(elide_end("一二三", "略.", 5), ("一略.".into(), 5)); + assert_eq!(elide_end("一二三", "略.", 4), ("略.".into(), 3)); + + // Multi-byte character at boundary + assert_eq!(elide_end("àbcdè", "", 5), ("àbcdè".into(), 5)); + assert_eq!(elide_end("àbcdè", "", 4), ("àbcd".into(), 4)); + assert_eq!(elide_end("àbcdè", "", 1), ("à".into(), 1)); + assert_eq!(elide_end("àbcdè", "", 0), ("".into(), 0)); + assert_eq!(elide_end("àbcdè", "ÀÇÈ", 4), ("àÀÇÈ".into(), 4)); + assert_eq!(elide_end("àbcdè", "ÀÇÈ", 3), ("ÀÇÈ".into(), 3)); + assert_eq!(elide_end("àbcdè", "ÀÇÈ", 2), ("ÀÇ".into(), 2)); + + // Decomposed character at boundary + assert_eq!( + elide_end("a\u{300}bcde\u{300}", "", 5), + ("a\u{300}bcde\u{300}".into(), 5) + ); + assert_eq!( + elide_end("a\u{300}bcde\u{300}", "", 4), + ("a\u{300}bcd".into(), 4) + ); + assert_eq!( + elide_end("a\u{300}bcde\u{300}", "", 1), + ("a\u{300}".into(), 1) + ); + assert_eq!(elide_end("a\u{300}bcde\u{300}", "", 0), ("".into(), 0)); + assert_eq!( + elide_end("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 4), + ("a\u{300}A\u{300}CE\u{300}".into(), 4) + ); + assert_eq!( + elide_end("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 3), + ("A\u{300}CE\u{300}".into(), 3) + ); + assert_eq!( + elide_end("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 2), + ("A\u{300}C".into(), 2) + ); + } + #[test] fn test_split_byte_line_to_words() { assert_eq!(split_byte_line_to_words(b""), vec![]); From 7049216a4a9036cf8ebc685d814154cbcf772ec9 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Wed, 16 Oct 2024 18:26:18 +0900 Subject: [PATCH 2/3] text_util: extract generic truncation helpers to support &[u8] --- cli/src/text_util.rs | 62 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/cli/src/text_util.rs b/cli/src/text_util.rs index 5d0307efa7..cd7990ce24 100644 --- a/cli/src/text_util.rs +++ b/cli/src/text_util.rs @@ -96,12 +96,23 @@ pub fn elide_end<'a>(text: &'a str, ellipsis: &'a str, max_width: usize) -> (Cow /// /// The truncated string may have 0-width decomposed characters at start. fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) { + truncate_start_pos_with_indices( + text.char_indices() + .rev() + .map(|(start, c)| (start + c.len_utf8(), c)), + max_width, + ) +} + +fn truncate_start_pos_with_indices( + char_indices_rev: impl Iterator, + max_width: usize, +) -> (usize, usize) { let mut acc_width = 0; - for (i, c) in text.char_indices().rev() { + for (end, c) in char_indices_rev { let new_width = acc_width + c.width().unwrap_or(0); if new_width > max_width { - let prev_index = i + c.len_utf8(); - return (prev_index, acc_width); + return (end, acc_width); } acc_width = new_width; } @@ -111,15 +122,23 @@ fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) { /// Shortens `text` to `max_width` by removing trailing characters, returning /// `(end_index, width)`. fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) { + truncate_end_pos_with_indices(text.char_indices(), text.len(), max_width) +} + +fn truncate_end_pos_with_indices( + char_indices_fwd: impl Iterator, + text_len: usize, + max_width: usize, +) -> (usize, usize) { let mut acc_width = 0; - for (i, c) in text.char_indices() { + for (start, c) in char_indices_fwd { let new_width = acc_width + c.width().unwrap_or(0); if new_width > max_width { - return (i, acc_width); + return (start, acc_width); } acc_width = new_width; } - (text.len(), acc_width) + (text_len, acc_width) } /// Skips `width` leading characters, returning `(start_index, skipped_width)`. @@ -129,14 +148,22 @@ fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) { /// /// The truncated string may have 0-width decomposed characters at start. fn skip_start_pos(text: &str, width: usize) -> (usize, usize) { + skip_start_pos_with_indices(text.char_indices(), text.len(), width) +} + +fn skip_start_pos_with_indices( + char_indices_fwd: impl Iterator, + text_len: usize, + width: usize, +) -> (usize, usize) { let mut acc_width = 0; - for (i, c) in text.char_indices() { + for (start, c) in char_indices_fwd { if acc_width >= width { - return (i, acc_width); + return (start, acc_width); } acc_width += c.width().unwrap_or(0); } - (text.len(), acc_width) + (text_len, acc_width) } /// Skips `width` trailing characters, returning `(end_index, skipped_width)`. @@ -144,11 +171,22 @@ fn skip_start_pos(text: &str, width: usize) -> (usize, usize) { /// The `skipped_width` may exceed the given `width` if `width` is not at /// character boundary. fn skip_end_pos(text: &str, width: usize) -> (usize, usize) { + skip_end_pos_with_indices( + text.char_indices() + .rev() + .map(|(start, c)| (start + c.len_utf8(), c)), + width, + ) +} + +fn skip_end_pos_with_indices( + char_indices_rev: impl Iterator, + width: usize, +) -> (usize, usize) { let mut acc_width = 0; - for (i, c) in text.char_indices().rev() { + for (end, c) in char_indices_rev { if acc_width >= width { - let prev_index = i + c.len_utf8(); - return (prev_index, acc_width); + return (end, acc_width); } acc_width += c.width().unwrap_or(0); } From c7f8c7c24a78d6e05ed201a4a065e254b0b73fc7 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Wed, 16 Oct 2024 18:40:37 +0900 Subject: [PATCH 3/3] text_util: add functions to truncate labeled text This will be used by truncate_start/end() template functions. I considered adding a template function that supports both padding and truncation, but the function interface looked a bit messy. There may be (max_width, ellipsis, left|middle|right) parameters for truncation, and (min_width, fill_char, left|center|right) for padding. I'm not going to add ellipsis and centering support, but it's weird if pad(center) implied truncate(middle). --- cli/src/text_util.rs | 204 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) diff --git a/cli/src/text_util.rs b/cli/src/text_util.rs index cd7990ce24..e44ee19ce5 100644 --- a/cli/src/text_util.rs +++ b/cli/src/text_util.rs @@ -16,6 +16,7 @@ use std::borrow::Cow; use std::cmp; use std::io; +use bstr::ByteSlice as _; use unicode_width::UnicodeWidthChar as _; use crate::formatter::FormatRecorder; @@ -104,6 +105,13 @@ fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) { ) } +fn truncate_start_pos_bytes(text: &[u8], max_width: usize) -> (usize, usize) { + truncate_start_pos_with_indices( + text.char_indices().rev().map(|(_, end, c)| (end, c)), + max_width, + ) +} + fn truncate_start_pos_with_indices( char_indices_rev: impl Iterator, max_width: usize, @@ -125,6 +133,14 @@ fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) { truncate_end_pos_with_indices(text.char_indices(), text.len(), max_width) } +fn truncate_end_pos_bytes(text: &[u8], max_width: usize) -> (usize, usize) { + truncate_end_pos_with_indices( + text.char_indices().map(|(start, _, c)| (start, c)), + text.len(), + max_width, + ) +} + fn truncate_end_pos_with_indices( char_indices_fwd: impl Iterator, text_len: usize, @@ -198,6 +214,57 @@ fn trim_start_zero_width_chars(text: &str) -> &str { text.trim_start_matches(|c: char| c.width().unwrap_or(0) == 0) } +/// Returns bytes length of leading 0-width characters. +fn count_start_zero_width_chars_bytes(text: &[u8]) -> usize { + text.char_indices() + .find(|(_, _, c)| c.width().unwrap_or(0) != 0) + .map(|(start, _, _)| start) + .unwrap_or(text.len()) +} + +/// Writes text truncated to `max_width` by removing leading characters. Returns +/// width of the truncated text, which may be shorter than `max_width`. +/// +/// The input `recorded_content` should be a single-line text. +pub fn write_truncated_start( + formatter: &mut dyn Formatter, + recorded_content: &FormatRecorder, + max_width: usize, +) -> io::Result { + let data = recorded_content.data(); + let (start, truncated_width) = truncate_start_pos_bytes(data, max_width); + let truncated_start = start + count_start_zero_width_chars_bytes(&data[start..]); + recorded_content.replay_with(formatter, |formatter, range| { + let start = cmp::max(range.start, truncated_start); + if start < range.end { + formatter.write_all(&data[start..range.end])?; + } + Ok(()) + })?; + Ok(truncated_width) +} + +/// Writes text truncated to `max_width` by removing trailing characters. +/// Returns width of the truncated text, which may be shorter than `max_width`. +/// +/// The input `recorded_content` should be a single-line text. +pub fn write_truncated_end( + formatter: &mut dyn Formatter, + recorded_content: &FormatRecorder, + max_width: usize, +) -> io::Result { + let data = recorded_content.data(); + let (truncated_end, truncated_width) = truncate_end_pos_bytes(data, max_width); + recorded_content.replay_with(formatter, |formatter, range| { + let end = cmp::min(range.end, truncated_end); + if range.start < end { + formatter.write_all(&data[range.start..end])?; + } + Ok(()) + })?; + Ok(truncated_width) +} + /// Indents each line by the given prefix preserving labels. pub fn write_indented( formatter: &mut dyn Formatter, @@ -536,6 +603,143 @@ mod tests { ); } + #[test] + fn test_write_truncated_labeled() { + let mut recorder = FormatRecorder::new(); + for (label, word) in [("red", "foo"), ("cyan", "bar")] { + recorder.push_label(label).unwrap(); + write!(recorder, "{word}").unwrap(); + recorder.pop_label().unwrap(); + } + + // Truncate start + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 6).map(|_| ())), + @"foobar" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 5).map(|_| ())), + @"oobar" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 3).map(|_| ())), + @"bar" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 2).map(|_| ())), + @"ar" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 0).map(|_| ())), + @"" + ); + + // Truncate end + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 6).map(|_| ())), + @"foobar" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 5).map(|_| ())), + @"fooba" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 3).map(|_| ())), + @"foo" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 2).map(|_| ())), + @"fo" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 0).map(|_| ())), + @"" + ); + } + + #[test] + fn test_write_truncated_non_ascii_chars() { + let mut recorder = FormatRecorder::new(); + write!(recorder, "a\u{300}bc\u{300}一二三").unwrap(); + + // Truncate start + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 1).map(|_| ())), + @"" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 2).map(|_| ())), + @"三" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 3).map(|_| ())), + @"三" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 6).map(|_| ())), + @"一二三" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 7).map(|_| ())), + @"c̀一二三" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 9).map(|_| ())), + @"àbc̀一二三" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 10).map(|_| ())), + @"àbc̀一二三" + ); + + // Truncate end + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 1).map(|_| ())), + @"à" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 4).map(|_| ())), + @"àbc̀" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 5).map(|_| ())), + @"àbc̀一" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 9).map(|_| ())), + @"àbc̀一二三" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 10).map(|_| ())), + @"àbc̀一二三" + ); + } + + #[test] + fn test_write_truncated_empty_content() { + let recorder = FormatRecorder::new(); + + // Truncate start + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 0).map(|_| ())), + @"" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_start(formatter, &recorder, 1).map(|_| ())), + @"" + ); + + // Truncate end + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 0).map(|_| ())), + @"" + ); + insta::assert_snapshot!( + format_colored(|formatter| write_truncated_end(formatter, &recorder, 1).map(|_| ())), + @"" + ); + } + #[test] fn test_split_byte_line_to_words() { assert_eq!(split_byte_line_to_words(b""), vec![]);