Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.DS_Store
*.out
*.test
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
[Compare](https://github.com/clipperhouse/displaywidth/compare/v0.9.0...v0.10.0)

### Added
- New `IgnoreControlSequences` option to treat ECMA-48/ANSI escape sequences as zero-width. (#20)
- New `ControlSequences` option to treat ECMA-48/ANSI escape sequences as zero-width. (#20)
- `TruncateString` and `TruncateBytes` now preserve trailing ANSI escape sequences (such as SGR resets) when `ControlSequences` is true, preventing color bleed in terminal output.

### Changed
- Removed `stringish` dependency; generic type constraints are now inline `~string | []byte`.
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ Create the options you need, and then use methods on the options struct.
```go
var myOptions = displaywidth.Options{
EastAsianWidth: true,
IgnoreControlSequences: true,
ControlSequences: true,
}

width := myOptions.String("Hello, 世界!")
```

#### IgnoreControlSequences
#### ControlSequences

`IgnoreControlSequences` specifies whether to ignore ECMA-48 escape sequences
`ControlSequences` specifies whether to ignore ECMA-48 escape sequences
when calculating the display width. When `false` (default), ANSI escape
sequences are treated as just a series of characters. When `true`, they are
treated as a single zero-width unit.
Expand Down
26 changes: 13 additions & 13 deletions fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ func FuzzBytesAndString(f *testing.F) {
options := []Options{
{EastAsianWidth: false},
{EastAsianWidth: true},
{IgnoreControlSequences: true},
{EastAsianWidth: true, IgnoreControlSequences: true},
{ControlSequences: true},
{EastAsianWidth: true, ControlSequences: true},
}

for _, option := range options {
Expand Down Expand Up @@ -190,13 +190,13 @@ func FuzzRune(f *testing.F) {
}
}

// Test with different options (Rune is per-rune, IgnoreControlSequences
// Test with different options (Rune is per-rune, ControlSequences
// doesn't affect single runes, but we include it for completeness)
options := []Options{
{EastAsianWidth: false},
{EastAsianWidth: true},
{IgnoreControlSequences: true},
{EastAsianWidth: true, IgnoreControlSequences: true},
{ControlSequences: true},
{EastAsianWidth: true, ControlSequences: true},
}

for _, option := range options {
Expand Down Expand Up @@ -315,8 +315,8 @@ func FuzzTruncateStringAndBytes(f *testing.F) {
options := []Options{
{EastAsianWidth: false},
{EastAsianWidth: true},
{IgnoreControlSequences: true},
{EastAsianWidth: true, IgnoreControlSequences: true},
{ControlSequences: true},
{EastAsianWidth: true, ControlSequences: true},
}

for _, option := range options {
Expand All @@ -336,7 +336,7 @@ func FuzzTruncateStringAndBytes(f *testing.F) {
}

// FuzzControlSequences fuzzes strings containing ANSI/ECMA-48 escape sequences
// across all option combinations (EastAsianWidth x IgnoreControlSequences).
// across all option combinations (EastAsianWidth x ControlSequences).
func FuzzControlSequences(f *testing.F) {
if testing.Short() {
f.Skip("skipping fuzz test in short mode")
Expand Down Expand Up @@ -382,8 +382,8 @@ func FuzzControlSequences(f *testing.F) {
allOptions := []Options{
{},
{EastAsianWidth: true},
{IgnoreControlSequences: true},
{EastAsianWidth: true, IgnoreControlSequences: true},
{ControlSequences: true},
{EastAsianWidth: true, ControlSequences: true},
}

f.Fuzz(func(t *testing.T, text []byte) {
Expand Down Expand Up @@ -430,13 +430,13 @@ func FuzzControlSequences(f *testing.F) {
t.Errorf("sum of StringGraphemes widths %d != String() %d with %+v for %q", sgSum, ws, opt, text)
}

// Invariant: IgnoreControlSequences width <= default width
// Invariant: ControlSequences width <= default width
// (escape sequences become 0 instead of their visible char widths)
if opt.IgnoreControlSequences {
if opt.ControlSequences {
noIgnore := Options{EastAsianWidth: opt.EastAsianWidth}
wDefault := noIgnore.Bytes(text)
if wb > wDefault {
t.Errorf("IgnoreControlSequences width %d > default width %d with %+v for %q", wb, wDefault, opt, text)
t.Errorf("ControlSequences width %d > default width %d with %+v for %q", wb, wDefault, opt, text)
}
}

Expand Down
4 changes: 2 additions & 2 deletions graphemes.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func StringGraphemes(s string) Graphemes[string] {
// using the Width method.
func (options Options) StringGraphemes(s string) Graphemes[string] {
g := graphemes.FromString(s)
g.AnsiEscapeSequences = options.IgnoreControlSequences
g.AnsiEscapeSequences = options.ControlSequences

return Graphemes[string]{iter: g, options: options}
}
Expand All @@ -65,7 +65,7 @@ func BytesGraphemes(s []byte) Graphemes[[]byte] {
// using the Width method.
func (options Options) BytesGraphemes(s []byte) Graphemes[[]byte] {
g := graphemes.FromBytes(s)
g.AnsiEscapeSequences = options.IgnoreControlSequences
g.AnsiEscapeSequences = options.ControlSequences

return Graphemes[[]byte]{iter: g, options: options}
}
67 changes: 67 additions & 0 deletions truncate_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package displaywidth

import "testing"

var csOptions = Options{ControlSequences: true}

// Inputs for benchmarking truncation with trailing escape sequence preservation
var (
// Short colored text with reset
shortANSI = "\x1b[31mhello world\x1b[0m"
// Multiple stacked SGR sequences
stackedANSI = "\x1b[1m\x1b[31m\x1b[42mhello world, this is some longer text\x1b[0m"
// Many interleaved color changes
interleavedANSI = "hello \x1b[31mworld \x1b[32mfoo \x1b[33mbar \x1b[34mbaz \x1b[35mqux \x1b[36mend\x1b[0m"
// Plain text (no escape sequences) — baseline
plainText = "hello world, this is some plain text without escapes"
)

func BenchmarkTruncateString(b *testing.B) {
benchmarks := []struct {
name string
input string
options Options
}{
{"plain/default", plainText, defaultOptions},
{"plain/ControlSequences", plainText, csOptions},
{"short_ANSI/default", shortANSI, defaultOptions},
{"short_ANSI/ControlSequences", shortANSI, csOptions},
{"stacked_ANSI/ControlSequences", stackedANSI, csOptions},
{"interleaved_ANSI/ControlSequences", interleavedANSI, csOptions},
}

for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = bm.options.TruncateString(bm.input, 5, "...")
}
})
}
}

var tail = []byte("...")

func BenchmarkTruncateBytes(b *testing.B) {
benchmarks := []struct {
name string
input []byte
options Options
}{
{"plain/default", []byte(plainText), defaultOptions},
{"plain/ControlSequences", []byte(plainText), csOptions},
{"short_ANSI/default", []byte(shortANSI), defaultOptions},
{"short_ANSI/ControlSequences", []byte(shortANSI), csOptions},
{"stacked_ANSI/ControlSequences", []byte(stackedANSI), csOptions},
{"interleaved_ANSI/ControlSequences", []byte(interleavedANSI), csOptions},
}

for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = bm.options.TruncateBytes(bm.input, 5, tail)
}
})
}
}
62 changes: 52 additions & 10 deletions width.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package displaywidth

import (
"strings"
"unicode/utf8"

"github.com/clipperhouse/uax29/v2/graphemes"
Expand All @@ -14,20 +15,20 @@ type Options struct {
// are treated as width 1. When true, they are width 2.
EastAsianWidth bool

// IgnoreControlSequences specifies whether to ignore ECMA-48 escape sequences
// ControlSequences specifies whether to ignore ECMA-48 escape sequences
// when calculating the display width. When false (default), ANSI escape
// sequences are treated as just a series of characters. When true, they are
// treated as a single zero-width unit.
//
// Note that this option is about *sequences*. Individual control characters
// are already treated as zero-width. With this option, ANSI sequences such as
// "\x1b[31m" and "\x1b[0m" do not count towards the width of a string.
IgnoreControlSequences bool
ControlSequences bool
}

// DefaultOptions is the default options for the display width
// calculation, which is EastAsianWidth false and IgnoreControlSequences false.
var DefaultOptions = Options{EastAsianWidth: false, IgnoreControlSequences: false}
// calculation, which is EastAsianWidth false and ControlSequences false.
var DefaultOptions = Options{EastAsianWidth: false, ControlSequences: false}

// String calculates the display width of a string,
// by iterating over grapheme clusters in the string
Expand All @@ -53,7 +54,7 @@ func (options Options) String(s string) int {

// Not ASCII, use grapheme parsing
g := graphemes.FromString(s[pos:])
g.AnsiEscapeSequences = options.IgnoreControlSequences
g.AnsiEscapeSequences = options.ControlSequences

start := pos

Expand Down Expand Up @@ -103,7 +104,7 @@ func (options Options) Bytes(s []byte) int {

// Not ASCII, use grapheme parsing
g := graphemes.FromBytes(s[pos:])
g.AnsiEscapeSequences = options.IgnoreControlSequences
g.AnsiEscapeSequences = options.ControlSequences

start := pos

Expand Down Expand Up @@ -168,14 +169,19 @@ const _Default property = 0
// TruncateString truncates a string to the given maxWidth, and appends the
// given tail if the string is truncated.
//
// It ensures the total width, including the width of the tail, is less than or
// It ensures the visible width, including the width of the tail, is less than or
// equal to maxWidth.
//
// When [Options.ControlSequences] is true, ANSI escape sequences that appear
// after the truncation point are preserved in the output. This ensures that
// escape sequences such as SGR resets are not lost, preventing color bleed
// in terminal output.
func (options Options) TruncateString(s string, maxWidth int, tail string) string {
maxWidthWithoutTail := maxWidth - options.String(tail)

var pos, total int
g := graphemes.FromString(s)
g.AnsiEscapeSequences = options.IgnoreControlSequences
g.AnsiEscapeSequences = options.ControlSequences

for g.Next() {
gw := graphemeWidth(g.Value(), options)
Expand All @@ -184,6 +190,22 @@ func (options Options) TruncateString(s string, maxWidth int, tail string) strin
}
total += gw
if total > maxWidth {
if options.ControlSequences {
// Build result with trailing ANSI escape sequences preserved
var b strings.Builder
b.Grow(len(s) + len(tail)) // at most original + tail
b.WriteString(s[:pos])
b.WriteString(tail)
rem := graphemes.FromString(s[pos:])
rem.AnsiEscapeSequences = true
for rem.Next() {
v := rem.Value()
if len(v) > 0 && v[0] == 0x1B {
b.WriteString(v)
}
}
return b.String()
}
return s[:pos] + tail
}
}
Expand All @@ -203,14 +225,19 @@ func TruncateString(s string, maxWidth int, tail string) string {
// TruncateBytes truncates a []byte to the given maxWidth, and appends the
// given tail if the []byte is truncated.
//
// It ensures the total width, including the width of the tail, is less than or
// It ensures the visible width, including the width of the tail, is less than or
// equal to maxWidth.
//
// When [Options.ControlSequences] is true, ANSI escape sequences that appear
// after the truncation point are preserved in the output. This ensures that
// escape sequences such as SGR resets are not lost, preventing color bleed
// in terminal output.
func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte {
maxWidthWithoutTail := maxWidth - options.Bytes(tail)

var pos, total int
g := graphemes.FromBytes(s)
g.AnsiEscapeSequences = options.IgnoreControlSequences
g.AnsiEscapeSequences = options.ControlSequences

for g.Next() {
gw := graphemeWidth(g.Value(), options)
Expand All @@ -219,6 +246,21 @@ func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte
}
total += gw
if total > maxWidth {
if options.ControlSequences {
// Build result with trailing ANSI escape sequences preserved
result := make([]byte, 0, len(s)+len(tail)) // at most original + tail
result = append(result, s[:pos]...)
result = append(result, tail...)
rem := graphemes.FromBytes(s[pos:])
rem.AnsiEscapeSequences = true
for rem.Next() {
v := rem.Value()
if len(v) > 0 && v[0] == 0x1B {
result = append(result, v...)
}
}
return result
}
result := make([]byte, 0, pos+len(tail))
result = append(result, s[:pos]...)
result = append(result, tail...)
Expand Down
Loading