From a14291e4a31f6b159ee3080b34731ec793bdb263 Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Wed, 16 Oct 2024 18:01:07 +0900
Subject: [PATCH 1/3] text_util: add elide_end() function

This function mirrors elide_start(), literally. We don't have any callers for
the moment, but it helps write tests of inner truncation helpers. I'm going to
add bytes version of these functions to implement "truncate" template functions.
---
 cli/src/text_util.rs | 128 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/cli/src/text_util.rs b/cli/src/text_util.rs
index 32c92dbc9b..5d0307efa7 100644
--- a/cli/src/text_util.rs
+++ b/cli/src/text_util.rs
@@ -66,6 +66,31 @@ pub fn elide_start<'a>(
     (Cow::Owned([ellipsis, text].concat()), concat_width)
 }
 
+/// Shortens `text` to `max_width` by removing trailing characters. `ellipsis`
+/// is added if the `text` gets truncated.
+///
+/// The returned string (including `ellipsis`) never exceeds the `max_width`.
+pub fn elide_end<'a>(text: &'a str, ellipsis: &'a str, max_width: usize) -> (Cow<'a, str>, usize) {
+    let (text_end, text_width) = truncate_end_pos(text, max_width);
+    if text_end == text.len() {
+        return (Cow::Borrowed(text), text_width);
+    }
+
+    let (ellipsis_end, ellipsis_width) = truncate_end_pos(ellipsis, max_width);
+    if ellipsis_end != ellipsis.len() {
+        let ellipsis = &ellipsis[..ellipsis_end];
+        return (Cow::Borrowed(ellipsis), ellipsis_width);
+    }
+
+    let text = &text[..text_end];
+    let max_text_width = max_width - ellipsis_width;
+    let (skip, skipped_width) = skip_end_pos(text, text_width.saturating_sub(max_text_width));
+    let text = &text[..skip];
+    let concat_width = (text_width - skipped_width) + ellipsis_width;
+    assert!(concat_width <= max_width);
+    (Cow::Owned([text, ellipsis].concat()), concat_width)
+}
+
 /// Shortens `text` to `max_width` by removing leading characters, returning
 /// `(start_index, width)`.
 ///
@@ -83,6 +108,20 @@ fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) {
     (0, acc_width)
 }
 
+/// Shortens `text` to `max_width` by removing trailing characters, returning
+/// `(end_index, width)`.
+fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) {
+    let mut acc_width = 0;
+    for (i, c) in text.char_indices() {
+        let new_width = acc_width + c.width().unwrap_or(0);
+        if new_width > max_width {
+            return (i, acc_width);
+        }
+        acc_width = new_width;
+    }
+    (text.len(), acc_width)
+}
+
 /// Skips `width` leading characters, returning `(start_index, skipped_width)`.
 ///
 /// The `skipped_width` may exceed the given `width` if `width` is not at
@@ -100,6 +139,22 @@ fn skip_start_pos(text: &str, width: usize) -> (usize, usize) {
     (text.len(), acc_width)
 }
 
+/// Skips `width` trailing characters, returning `(end_index, skipped_width)`.
+///
+/// The `skipped_width` may exceed the given `width` if `width` is not at
+/// character boundary.
+fn skip_end_pos(text: &str, width: usize) -> (usize, usize) {
+    let mut acc_width = 0;
+    for (i, c) in text.char_indices().rev() {
+        if acc_width >= width {
+            let prev_index = i + c.len_utf8();
+            return (prev_index, acc_width);
+        }
+        acc_width += c.width().unwrap_or(0);
+    }
+    (0, acc_width)
+}
+
 /// Removes leading 0-width characters.
 fn trim_start_zero_width_chars(text: &str) -> &str {
     text.trim_start_matches(|c: char| c.width().unwrap_or(0) == 0)
@@ -370,6 +425,79 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_elide_end() {
+        // Empty string
+        assert_eq!(elide_end("", "", 1), ("".into(), 0));
+
+        // Basic truncation
+        assert_eq!(elide_end("abcdef", "", 6), ("abcdef".into(), 6));
+        assert_eq!(elide_end("abcdef", "", 5), ("abcde".into(), 5));
+        assert_eq!(elide_end("abcdef", "", 1), ("a".into(), 1));
+        assert_eq!(elide_end("abcdef", "", 0), ("".into(), 0));
+        assert_eq!(elide_end("abcdef", "-=~", 6), ("abcdef".into(), 6));
+        assert_eq!(elide_end("abcdef", "-=~", 5), ("ab-=~".into(), 5));
+        assert_eq!(elide_end("abcdef", "-=~", 4), ("a-=~".into(), 4));
+        assert_eq!(elide_end("abcdef", "-=~", 3), ("-=~".into(), 3));
+        assert_eq!(elide_end("abcdef", "-=~", 2), ("-=".into(), 2));
+        assert_eq!(elide_end("abcdef", "-=~", 1), ("-".into(), 1));
+        assert_eq!(elide_end("abcdef", "-=~", 0), ("".into(), 0));
+
+        // East Asian characters (char.width() == 2)
+        assert_eq!(elide_end("一二三", "", 6), ("一二三".into(), 6));
+        assert_eq!(elide_end("一二三", "", 5), ("一二".into(), 4));
+        assert_eq!(elide_end("一二三", "", 4), ("一二".into(), 4));
+        assert_eq!(elide_end("一二三", "", 1), ("".into(), 0));
+        assert_eq!(elide_end("一二三", "-=~", 6), ("一二三".into(), 6));
+        assert_eq!(elide_end("一二三", "-=~", 5), ("一-=~".into(), 5));
+        assert_eq!(elide_end("一二三", "-=~", 4), ("-=~".into(), 3));
+        assert_eq!(elide_end("一二三", "略", 6), ("一二三".into(), 6));
+        assert_eq!(elide_end("一二三", "略", 5), ("一略".into(), 4));
+        assert_eq!(elide_end("一二三", "略", 4), ("一略".into(), 4));
+        assert_eq!(elide_end("一二三", "略", 2), ("略".into(), 2));
+        assert_eq!(elide_end("一二三", "略", 1), ("".into(), 0));
+        assert_eq!(elide_end("一二三", ".", 5), ("一二.".into(), 5));
+        assert_eq!(elide_end("一二三", ".", 4), ("一.".into(), 3));
+        assert_eq!(elide_end("一二三", "略.", 5), ("一略.".into(), 5));
+        assert_eq!(elide_end("一二三", "略.", 4), ("略.".into(), 3));
+
+        // Multi-byte character at boundary
+        assert_eq!(elide_end("àbcdè", "", 5), ("àbcdè".into(), 5));
+        assert_eq!(elide_end("àbcdè", "", 4), ("àbcd".into(), 4));
+        assert_eq!(elide_end("àbcdè", "", 1), ("à".into(), 1));
+        assert_eq!(elide_end("àbcdè", "", 0), ("".into(), 0));
+        assert_eq!(elide_end("àbcdè", "ÀÇÈ", 4), ("àÀÇÈ".into(), 4));
+        assert_eq!(elide_end("àbcdè", "ÀÇÈ", 3), ("ÀÇÈ".into(), 3));
+        assert_eq!(elide_end("àbcdè", "ÀÇÈ", 2), ("ÀÇ".into(), 2));
+
+        // Decomposed character at boundary
+        assert_eq!(
+            elide_end("a\u{300}bcde\u{300}", "", 5),
+            ("a\u{300}bcde\u{300}".into(), 5)
+        );
+        assert_eq!(
+            elide_end("a\u{300}bcde\u{300}", "", 4),
+            ("a\u{300}bcd".into(), 4)
+        );
+        assert_eq!(
+            elide_end("a\u{300}bcde\u{300}", "", 1),
+            ("a\u{300}".into(), 1)
+        );
+        assert_eq!(elide_end("a\u{300}bcde\u{300}", "", 0), ("".into(), 0));
+        assert_eq!(
+            elide_end("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 4),
+            ("a\u{300}A\u{300}CE\u{300}".into(), 4)
+        );
+        assert_eq!(
+            elide_end("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 3),
+            ("A\u{300}CE\u{300}".into(), 3)
+        );
+        assert_eq!(
+            elide_end("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 2),
+            ("A\u{300}C".into(), 2)
+        );
+    }
+
     #[test]
     fn test_split_byte_line_to_words() {
         assert_eq!(split_byte_line_to_words(b""), vec![]);

From 7049216a4a9036cf8ebc685d814154cbcf772ec9 Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Wed, 16 Oct 2024 18:26:18 +0900
Subject: [PATCH 2/3] text_util: extract generic truncation helpers to support
 &[u8]

---
 cli/src/text_util.rs | 62 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 50 insertions(+), 12 deletions(-)

diff --git a/cli/src/text_util.rs b/cli/src/text_util.rs
index 5d0307efa7..cd7990ce24 100644
--- a/cli/src/text_util.rs
+++ b/cli/src/text_util.rs
@@ -96,12 +96,23 @@ pub fn elide_end<'a>(text: &'a str, ellipsis: &'a str, max_width: usize) -> (Cow
 ///
 /// The truncated string may have 0-width decomposed characters at start.
 fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) {
+    truncate_start_pos_with_indices(
+        text.char_indices()
+            .rev()
+            .map(|(start, c)| (start + c.len_utf8(), c)),
+        max_width,
+    )
+}
+
+fn truncate_start_pos_with_indices(
+    char_indices_rev: impl Iterator<Item = (usize, char)>,
+    max_width: usize,
+) -> (usize, usize) {
     let mut acc_width = 0;
-    for (i, c) in text.char_indices().rev() {
+    for (end, c) in char_indices_rev {
         let new_width = acc_width + c.width().unwrap_or(0);
         if new_width > max_width {
-            let prev_index = i + c.len_utf8();
-            return (prev_index, acc_width);
+            return (end, acc_width);
         }
         acc_width = new_width;
     }
@@ -111,15 +122,23 @@ fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) {
 /// Shortens `text` to `max_width` by removing trailing characters, returning
 /// `(end_index, width)`.
 fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) {
+    truncate_end_pos_with_indices(text.char_indices(), text.len(), max_width)
+}
+
+fn truncate_end_pos_with_indices(
+    char_indices_fwd: impl Iterator<Item = (usize, char)>,
+    text_len: usize,
+    max_width: usize,
+) -> (usize, usize) {
     let mut acc_width = 0;
-    for (i, c) in text.char_indices() {
+    for (start, c) in char_indices_fwd {
         let new_width = acc_width + c.width().unwrap_or(0);
         if new_width > max_width {
-            return (i, acc_width);
+            return (start, acc_width);
         }
         acc_width = new_width;
     }
-    (text.len(), acc_width)
+    (text_len, acc_width)
 }
 
 /// Skips `width` leading characters, returning `(start_index, skipped_width)`.
@@ -129,14 +148,22 @@ fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) {
 ///
 /// The truncated string may have 0-width decomposed characters at start.
 fn skip_start_pos(text: &str, width: usize) -> (usize, usize) {
+    skip_start_pos_with_indices(text.char_indices(), text.len(), width)
+}
+
+fn skip_start_pos_with_indices(
+    char_indices_fwd: impl Iterator<Item = (usize, char)>,
+    text_len: usize,
+    width: usize,
+) -> (usize, usize) {
     let mut acc_width = 0;
-    for (i, c) in text.char_indices() {
+    for (start, c) in char_indices_fwd {
         if acc_width >= width {
-            return (i, acc_width);
+            return (start, acc_width);
         }
         acc_width += c.width().unwrap_or(0);
     }
-    (text.len(), acc_width)
+    (text_len, acc_width)
 }
 
 /// Skips `width` trailing characters, returning `(end_index, skipped_width)`.
@@ -144,11 +171,22 @@ fn skip_start_pos(text: &str, width: usize) -> (usize, usize) {
 /// The `skipped_width` may exceed the given `width` if `width` is not at
 /// character boundary.
 fn skip_end_pos(text: &str, width: usize) -> (usize, usize) {
+    skip_end_pos_with_indices(
+        text.char_indices()
+            .rev()
+            .map(|(start, c)| (start + c.len_utf8(), c)),
+        width,
+    )
+}
+
+fn skip_end_pos_with_indices(
+    char_indices_rev: impl Iterator<Item = (usize, char)>,
+    width: usize,
+) -> (usize, usize) {
     let mut acc_width = 0;
-    for (i, c) in text.char_indices().rev() {
+    for (end, c) in char_indices_rev {
         if acc_width >= width {
-            let prev_index = i + c.len_utf8();
-            return (prev_index, acc_width);
+            return (end, acc_width);
         }
         acc_width += c.width().unwrap_or(0);
     }

From c7f8c7c24a78d6e05ed201a4a065e254b0b73fc7 Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Wed, 16 Oct 2024 18:40:37 +0900
Subject: [PATCH 3/3] text_util: add functions to truncate labeled text

This will be used by truncate_start/end() template functions. I considered
adding a template function that supports both padding and truncation, but the
function interface looked a bit messy. There may be (max_width, ellipsis,
left|middle|right) parameters for truncation, and (min_width, fill_char,
left|center|right) for padding. I'm not going to add ellipsis and centering
support, but it's weird if pad(center) implied truncate(middle).
---
 cli/src/text_util.rs | 204 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)

diff --git a/cli/src/text_util.rs b/cli/src/text_util.rs
index cd7990ce24..e44ee19ce5 100644
--- a/cli/src/text_util.rs
+++ b/cli/src/text_util.rs
@@ -16,6 +16,7 @@ use std::borrow::Cow;
 use std::cmp;
 use std::io;
 
+use bstr::ByteSlice as _;
 use unicode_width::UnicodeWidthChar as _;
 
 use crate::formatter::FormatRecorder;
@@ -104,6 +105,13 @@ fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) {
     )
 }
 
+fn truncate_start_pos_bytes(text: &[u8], max_width: usize) -> (usize, usize) {
+    truncate_start_pos_with_indices(
+        text.char_indices().rev().map(|(_, end, c)| (end, c)),
+        max_width,
+    )
+}
+
 fn truncate_start_pos_with_indices(
     char_indices_rev: impl Iterator<Item = (usize, char)>,
     max_width: usize,
@@ -125,6 +133,14 @@ fn truncate_end_pos(text: &str, max_width: usize) -> (usize, usize) {
     truncate_end_pos_with_indices(text.char_indices(), text.len(), max_width)
 }
 
+fn truncate_end_pos_bytes(text: &[u8], max_width: usize) -> (usize, usize) {
+    truncate_end_pos_with_indices(
+        text.char_indices().map(|(start, _, c)| (start, c)),
+        text.len(),
+        max_width,
+    )
+}
+
 fn truncate_end_pos_with_indices(
     char_indices_fwd: impl Iterator<Item = (usize, char)>,
     text_len: usize,
@@ -198,6 +214,57 @@ fn trim_start_zero_width_chars(text: &str) -> &str {
     text.trim_start_matches(|c: char| c.width().unwrap_or(0) == 0)
 }
 
+/// Returns bytes length of leading 0-width characters.
+fn count_start_zero_width_chars_bytes(text: &[u8]) -> usize {
+    text.char_indices()
+        .find(|(_, _, c)| c.width().unwrap_or(0) != 0)
+        .map(|(start, _, _)| start)
+        .unwrap_or(text.len())
+}
+
+/// Writes text truncated to `max_width` by removing leading characters. Returns
+/// width of the truncated text, which may be shorter than `max_width`.
+///
+/// The input `recorded_content` should be a single-line text.
+pub fn write_truncated_start(
+    formatter: &mut dyn Formatter,
+    recorded_content: &FormatRecorder,
+    max_width: usize,
+) -> io::Result<usize> {
+    let data = recorded_content.data();
+    let (start, truncated_width) = truncate_start_pos_bytes(data, max_width);
+    let truncated_start = start + count_start_zero_width_chars_bytes(&data[start..]);
+    recorded_content.replay_with(formatter, |formatter, range| {
+        let start = cmp::max(range.start, truncated_start);
+        if start < range.end {
+            formatter.write_all(&data[start..range.end])?;
+        }
+        Ok(())
+    })?;
+    Ok(truncated_width)
+}
+
+/// Writes text truncated to `max_width` by removing trailing characters.
+/// Returns width of the truncated text, which may be shorter than `max_width`.
+///
+/// The input `recorded_content` should be a single-line text.
+pub fn write_truncated_end(
+    formatter: &mut dyn Formatter,
+    recorded_content: &FormatRecorder,
+    max_width: usize,
+) -> io::Result<usize> {
+    let data = recorded_content.data();
+    let (truncated_end, truncated_width) = truncate_end_pos_bytes(data, max_width);
+    recorded_content.replay_with(formatter, |formatter, range| {
+        let end = cmp::min(range.end, truncated_end);
+        if range.start < end {
+            formatter.write_all(&data[range.start..end])?;
+        }
+        Ok(())
+    })?;
+    Ok(truncated_width)
+}
+
 /// Indents each line by the given prefix preserving labels.
 pub fn write_indented(
     formatter: &mut dyn Formatter,
@@ -536,6 +603,143 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_write_truncated_labeled() {
+        let mut recorder = FormatRecorder::new();
+        for (label, word) in [("red", "foo"), ("cyan", "bar")] {
+            recorder.push_label(label).unwrap();
+            write!(recorder, "{word}").unwrap();
+            recorder.pop_label().unwrap();
+        }
+
+        // Truncate start
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 6).map(|_| ())),
+            @"[38;5;1mfoo[39m[38;5;6mbar[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 5).map(|_| ())),
+            @"[38;5;1moo[39m[38;5;6mbar[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 3).map(|_| ())),
+            @"[38;5;6mbar[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 2).map(|_| ())),
+            @"[38;5;6mar[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 0).map(|_| ())),
+            @""
+        );
+
+        // Truncate end
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 6).map(|_| ())),
+            @"[38;5;1mfoo[39m[38;5;6mbar[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 5).map(|_| ())),
+            @"[38;5;1mfoo[39m[38;5;6mba[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 3).map(|_| ())),
+            @"[38;5;1mfoo[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 2).map(|_| ())),
+            @"[38;5;1mfo[39m"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 0).map(|_| ())),
+            @""
+        );
+    }
+
+    #[test]
+    fn test_write_truncated_non_ascii_chars() {
+        let mut recorder = FormatRecorder::new();
+        write!(recorder, "a\u{300}bc\u{300}一二三").unwrap();
+
+        // Truncate start
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 1).map(|_| ())),
+            @""
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 2).map(|_| ())),
+            @"三"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 3).map(|_| ())),
+            @"三"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 6).map(|_| ())),
+            @"一二三"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 7).map(|_| ())),
+            @"c̀一二三"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 9).map(|_| ())),
+            @"àbc̀一二三"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 10).map(|_| ())),
+            @"àbc̀一二三"
+        );
+
+        // Truncate end
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 1).map(|_| ())),
+            @"à"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 4).map(|_| ())),
+            @"àbc̀"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 5).map(|_| ())),
+            @"àbc̀一"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 9).map(|_| ())),
+            @"àbc̀一二三"
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 10).map(|_| ())),
+            @"àbc̀一二三"
+        );
+    }
+
+    #[test]
+    fn test_write_truncated_empty_content() {
+        let recorder = FormatRecorder::new();
+
+        // Truncate start
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 0).map(|_| ())),
+            @""
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_start(formatter, &recorder, 1).map(|_| ())),
+            @""
+        );
+
+        // Truncate end
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 0).map(|_| ())),
+            @""
+        );
+        insta::assert_snapshot!(
+            format_colored(|formatter| write_truncated_end(formatter, &recorder, 1).map(|_| ())),
+            @""
+        );
+    }
+
     #[test]
     fn test_split_byte_line_to_words() {
         assert_eq!(split_byte_line_to_words(b""), vec![]);