From 5d04b5d1ddb5ad4c80af1598c94f155996624668 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Thu, 21 Jan 2021 08:35:34 +0100 Subject: [PATCH 1/4] test: remove unneeded internal function. --- runewidth_test.go | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/runewidth_test.go b/runewidth_test.go index 89c943f..39b5020 100644 --- a/runewidth_test.go +++ b/runewidth_test.go @@ -87,41 +87,6 @@ func isCompact(t *testing.T, ti *tableInfo) bool { return true } -// This is a utility function in case that a table has changed. -func printCompactTable(tbl table) { - counter := 0 - printEntry := func(first, last rune) { - if counter%3 == 0 { - fmt.Printf("\t") - } - fmt.Printf("{0x%04X, 0x%04X},", first, last) - if (counter+1)%3 == 0 { - fmt.Printf("\n") - } else { - fmt.Printf(" ") - } - counter++ - } - - sort.Sort(&tbl) // just in case - first := rune(-1) - for i := range tbl { - e := tbl[i] - if !checkInterval(e.first, e.last) { // sanity check - panic("invalid table") - } - if first < 0 { - first = e.first - } - if i+1 < len(tbl) && e.last+1 >= tbl[i+1].first { // can be combined into one entry - continue - } - printEntry(first, e.last) - first = -1 - } - fmt.Printf("\n\n") -} - func TestSorted(t *testing.T) { for _, ti := range tables { if !sort.IsSorted(&ti.tbl) { @@ -129,7 +94,6 @@ func TestSorted(t *testing.T) { } if !isCompact(t, &ti) { t.Errorf("table not compact: %s", ti.name) - //printCompactTable(ti.tbl) } } } From 729daa3aaae8caf67aa296664775fd19bf263c5a Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Thu, 21 Jan 2021 08:49:36 +0100 Subject: [PATCH 2/4] test: introduce TestRuneWidthChecksums. This makes sure that future optimizations don't break functionality. --- runewidth_test.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/runewidth_test.go b/runewidth_test.go index 39b5020..512d64a 100644 --- a/runewidth_test.go +++ b/runewidth_test.go @@ -65,6 +65,31 @@ func TestTableChecksums(t *testing.T) { } } +func TestRuneWidthChecksums(t *testing.T) { + var testcases = []struct { + name string + eastAsianWidth bool + wantSHA string + }{ + {"ea-no", false, "88a092b8ab7cddcf189f30d96c4b0d747fdef52d1ee8bcb6de0adbe5ff2a9fe6"}, + {"ea-yes", true, "e4ecd64af7fcc27369a7f128a0e7fdab7940fb293ad772713d3db757c4592662"}, + } + + for _, testcase := range testcases { + c := NewCondition() + c.EastAsianWidth = testcase.eastAsianWidth + buf := make([]byte, utf8.MaxRune+1) + for r := rune(0); r <= utf8.MaxRune; r++ { + buf[r] = byte(c.RuneWidth(r)) + } + gotSHA := fmt.Sprintf("%x", sha256.Sum256(buf)) + if gotSHA != testcase.wantSHA { + t.Errorf("TestRuneWidthChecksums = %s,\n\tsha256 = %s want %s", + testcase.name, gotSHA, testcase.wantSHA) + } + } +} + func checkInterval(first, last rune) bool { return first >= 0 && first <= utf8.MaxRune && last >= 0 && last <= utf8.MaxRune && From 2da2dd79876180b6ce8cf2689e939d743b1ec631 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Thu, 21 Jan 2021 09:03:38 +0100 Subject: [PATCH 3/4] benchmark: add RuneWidth and String1Width functions --- benchmark_test.go | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/benchmark_test.go b/benchmark_test.go index bffd78a..d472be4 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -7,6 +7,79 @@ import ( var benchSink int +// +// RuneWidth +// + +func benchRuneWidth(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int { + n := 0 + got := -1 + c := NewCondition() + c.EastAsianWidth = eastAsianWidth + for i := 0; i < b.N; i++ { + got = n + for r := start; r < stop; r++ { + n += c.RuneWidth(r) + } + got = n - got + } + if want != 0 && got != want { // some extra checks + b.Errorf("got %d, want %d\n", got, want) + } + return n +} +func BenchmarkRuneWidthAll(b *testing.B) { + benchSink = benchRuneWidth(b, false, 0, utf8.MaxRune+1, 1293932) +} +func BenchmarkRuneWidth768(b *testing.B) { + benchSink = benchRuneWidth(b, false, 0, 0x300, 702) +} +func BenchmarkRuneWidthAllEastAsian(b *testing.B) { + benchSink = benchRuneWidth(b, true, 0, utf8.MaxRune+1, 1432558) +} +func BenchmarkRuneWidth768EastAsian(b *testing.B) { + benchSink = benchRuneWidth(b, true, 0, 0x300, 794) +} + +// +// String1Width - strings which consist of a single rune +// + +func benchString1Width(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int { + n := 0 + got := -1 + c := NewCondition() + c.EastAsianWidth = eastAsianWidth + for i := 0; i < b.N; i++ { + got = n + for r := start; r < stop; r++ { + s := string(r) + n += c.StringWidth(s) + } + got = n - got + } + if want != 0 && got != want { // some extra checks + b.Errorf("got %d, want %d\n", got, want) + } + return n +} +func BenchmarkString1WidthAll(b *testing.B) { + benchSink = benchString1Width(b, false, 0, utf8.MaxRune+1, 1295980) +} +func BenchmarkString1Width768(b *testing.B) { + benchSink = benchString1Width(b, false, 0, 0x300, 702) +} +func BenchmarkString1WidthAllEastAsian(b *testing.B) { + benchSink = benchString1Width(b, true, 0, utf8.MaxRune+1, 1436654) +} +func BenchmarkString1Width768EastAsian(b *testing.B) { + benchSink = benchString1Width(b, true, 0, 0x300, 794) +} + +// +// tables +// + func benchTable(b *testing.B, tbl table) int { n := 0 for i := 0; i < b.N; i++ { From 242a4ff675f86b837202de58d1b6dfc95c915d47 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Thu, 21 Jan 2021 10:17:53 +0100 Subject: [PATCH 4/4] Optimize RuneWidth. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This results in a 6x speedup for the common case RuneWidth768. name old time/op new time/op delta RuneWidth768-4 21.5µs ± 0% 2.7µs ± 0% -87.27% (p=0.029 n=4+4) RuneWidthAll-4 51.1ms ± 0% 45.3ms ± 0% -11.38% (p=0.029 n=4+4) String1Width768-4 219µs ± 2% 198µs ± 2% -9.73% (p=0.029 n=4+4) RuneWidth768EastAsian-4 35.6µs ± 0% 34.2µs ± 0% -3.86% (p=0.029 n=4+4) RuneWidthAllEastAsian-4 71.5ms ± 0% 69.1ms ± 0% -3.26% (p=0.029 n=4+4) String1WidthAll-4 356ms ± 1% 353ms ± 2% ~ (p=0.343 n=4+4) String1WidthAllEastAsian-4 378ms ± 1% 378ms ± 2% ~ (p=0.886 n=4+4) String1Width768EastAsian-4 236µs ± 1% 235µs ± 2% ~ (p=0.886 n=4+4) [Geo mean] 3.44ms 2.56ms -25.63% --- runewidth.go | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/runewidth.go b/runewidth.go index f3871a6..70038f9 100644 --- a/runewidth.go +++ b/runewidth.go @@ -96,13 +96,31 @@ func NewCondition() *Condition { // RuneWidth returns the number of cells in r. // See http://www.unicode.org/reports/tr11/ func (c *Condition) RuneWidth(r rune) int { - switch { - case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining, notassigned): - return 0 - case (c.EastAsianWidth && IsAmbiguousWidth(r)) || inTables(r, doublewidth): - return 2 - default: - return 1 + // optimized version, verified by TestRuneWidthChecksums() + if !c.EastAsianWidth { + switch { + case r < 0x20 || r > 0x10FFFF: + return 0 + case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint + return 0 + case r < 0x300: + return 1 + case inTables(r, nonprint, combining, notassigned): + return 0 + case inTable(r, doublewidth): + return 2 + default: + return 1 + } + } else { + switch { + case r < 0x20 || r > 0x10FFFF || inTables(r, nonprint, combining, notassigned): + return 0 + case inTables(r, private, ambiguous, doublewidth): + return 2 + default: + return 1 + } } }