Make determining whether a code point represents a combining mark faster (#1719)

2026-03-25 18:07:07 +09:00 · 2020-06-12 09:40:00 +05:30
parent efb38b8636
commit 5ce26cca71
3 changed files with 29 additions and 9 deletions
--- a/2
+++ b/2
@@ -79,7 +79,7 @@ bench-compare:
 	for i in 1 2 3; do \
 		go test -bench=. ./internal/...; \
 	done > benchmark_results
-	benchstat benchmark_results_baseline benchmark_results
+	benchstat -alpha 0.15 benchmark_results_baseline benchmark_results
 clean:
 	rm -f micro
--- a/internal/util/unicode.go
+++ b/internal/util/unicode.go
@@ -16,6 +16,16 @@ import (
 // For rendering, micro will display the combining characters. It's not perfect
 // but it's pretty good.
 var minMark = rune(unicode.Mark.R16[0].Lo)
 func isMark(r rune) bool {
 	// Fast path
 	if r < minMark {
 		return false
 	}
 	return unicode.In(r, unicode.Mark)
 }
 // DecodeCharacter returns the next character from an array of bytes
 // A character is a rune along with any accompanying combining runes
 func DecodeCharacter(b []byte) (rune, []rune, int) {
@@ -24,7 +34,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
 	c, s := utf8.DecodeRune(b)
 	var combc []rune
-	for unicode.In(c, unicode.Mark) {
+	for isMark(c) {
 		combc = append(combc, c)
 		size += s
@@ -43,7 +53,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
 	c, s := utf8.DecodeRuneInString(str)
 	var combc []rune
-	for unicode.In(c, unicode.Mark) {
+	for isMark(c) {
 		combc = append(combc, c)
 		size += s
@@ -61,7 +71,7 @@ func CharacterCount(b []byte) int {
 	for len(b) > 0 {
 		r, size := utf8.DecodeRune(b)
-		if !unicode.In(r, unicode.Mark) {
+		if !isMark(r) {
 			s++
 		}
@@ -77,7 +87,7 @@ func CharacterCountInString(str string) int {
 	s := 0
 	for _, r := range str {
-		if !unicode.In(r, unicode.Mark) {
+		if !isMark(r) {
 			s++
 		}
 	}
--- a/pkg/highlight/unicode.go
+++ b/pkg/highlight/unicode.go
@@ -5,6 +5,16 @@ import (
 	"unicode/utf8"
 )
 var minMark = rune(unicode.Mark.R16[0].Lo)
 func isMark(r rune) bool {
 	// Fast path
 	if r < minMark {
 		return false
 	}
 	return unicode.In(r, unicode.Mark)
 }
 // DecodeCharacter returns the next character from an array of bytes
 // A character is a rune along with any accompanying combining runes
 func DecodeCharacter(b []byte) (rune, []rune, int) {
@@ -13,7 +23,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
 	c, s := utf8.DecodeRune(b)
 	var combc []rune
-	for unicode.In(c, unicode.Mark) {
+	for isMark(c) {
 		combc = append(combc, c)
 		size += s
@@ -32,7 +42,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
 	c, s := utf8.DecodeRuneInString(str)
 	var combc []rune
-	for unicode.In(c, unicode.Mark) {
+	for isMark(c) {
 		combc = append(combc, c)
 		size += s
@@ -50,7 +60,7 @@ func CharacterCount(b []byte) int {
 	for len(b) > 0 {
 		r, size := utf8.DecodeRune(b)
-		if !unicode.In(r, unicode.Mark) {
+		if !isMark(r) {
 			s++
 		}
@@ -66,7 +76,7 @@ func CharacterCountInString(str string) int {
 	s := 0
 	for _, r := range str {
-		if !unicode.In(r, unicode.Mark) {
+		if !isMark(r) {
 			s++
 		}
 	}