Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

unicode/utf8: make DecodeRune{,InString} inlineable #75181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
jub0bs wants to merge 3 commits into golang:master from jub0bs:make-decoderune-inlineable
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions src/bufio/bufio.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) {
if b.r == b.w {
return 0, 0, b.readErr()
}
r, size = rune(b.buf[b.r]), 1
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(b.buf[b.r:b.w])
}
r, size = utf8.DecodeRune(b.buf[b.r:b.w])
b.r += size
b.lastByte = int(b.buf[b.r-1])
b.lastRuneSize = size
Expand Down
40 changes: 8 additions & 32 deletions src/bytes/bytes.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
// more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0
for i := 0; i < len(s); {
size := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(s[i:])
}
r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
spans = append(spans, span{start, i})
Expand Down Expand Up @@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
// fine. It could also shrink but that falls out naturally.
b := make([]byte, 0, len(s))
for i := 0; i < len(s); {
wid := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, wid = utf8.DecodeRune(s[i:])
}
r, wid := utf8.DecodeRune(s[i:])
r = mapping(r)
if r >= 0 {
b = utf8.AppendRune(b, r)
Expand Down Expand Up @@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int {
func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
start := 0
for start < len(s) {
wid := 1
r := rune(s[start])
if r >= utf8.RuneSelf {
r, wid = utf8.DecodeRune(s[start:])
}
r, wid := utf8.DecodeRune(s[start:])
if f(r) == truth {
return start
}
Expand Down Expand Up @@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte {

func trimLeftUnicode(s []byte, cutset string) []byte {
for len(s) > 0 {
r, n := rune(s[0]), 1
if r >= utf8.RuneSelf {
r, n = utf8.DecodeRune(s)
}
r, n := utf8.DecodeRune(s)
if !containsRune(cutset, r) {
break
}
Expand Down Expand Up @@ -1258,19 +1243,10 @@ hasUnicode:
t = t[i:]
for len(s) != 0 && len(t) != 0 {
// Extract first rune from each.
var sr, tr rune
if s[0] < utf8.RuneSelf {
sr, s = rune(s[0]), s[1:]
} else {
r, size := utf8.DecodeRune(s)
sr, s = r, s[size:]
}
if t[0] < utf8.RuneSelf {
tr, t = rune(t[0]), t[1:]
} else {
r, size := utf8.DecodeRune(t)
tr, t = r, t[size:]
}
sr, size := utf8.DecodeRune(s)
s = s[size:]
tr, size := utf8.DecodeRune(t)
t = t[size:]

// If they match, keep going; if not, return false.

Expand Down
6 changes: 1 addition & 5 deletions src/bytes/iter.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
return func(yield func([]byte) bool) {
start := -1
for i := 0; i < len(s); {
size := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(s[i:])
}
r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
if !yield(s[start:i:i]) {
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/compile/internal/test/inl_test.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) {
"assemble64",
},
"unicode/utf8": {
"DecodeRune",
"DecodeRuneInString",
"FullRune",
"FullRuneInString",
"RuneLen",
Expand Down
4 changes: 0 additions & 4 deletions src/encoding/json/decode.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) {
if c == '\\' || c == '"' || c < ' ' {
break
}
if c < utf8.RuneSelf {
r++
continue
}
rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 {
break
Expand Down
5 changes: 1 addition & 4 deletions src/fmt/format.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte {
if n < 0 {
return b[:i]
}
wid := 1
if b[i] >= utf8.RuneSelf {
_, wid = utf8.DecodeRune(b[i:])
}
_, wid := utf8.DecodeRune(b[i:])
i += wid
}
}
Expand Down
5 changes: 1 addition & 4 deletions src/fmt/print.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -1145,10 +1145,7 @@ formatLoop:
break
}

verb, size := rune(format[i]), 1
if verb >= utf8.RuneSelf {
verb, size = utf8.DecodeRuneInString(format[i:])
}
verb, size := utf8.DecodeRuneInString(format[i:])
i += size

switch {
Expand Down
28 changes: 4 additions & 24 deletions src/regexp/regexp.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -384,10 +384,6 @@ type inputString struct {

func (i *inputString) step(pos int) (rune, int) {
if pos < len(i.str) {
c := i.str[pos]
if c < utf8.RuneSelf {
return rune(c), 1
}
return utf8.DecodeRuneInString(i.str[pos:])
}
return endOfText, 0
Expand All @@ -409,17 +405,11 @@ func (i *inputString) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
}
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
}
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
return newLazyFlag(r1, r2)
}
Expand All @@ -431,10 +421,6 @@ type inputBytes struct {

func (i *inputBytes) step(pos int) (rune, int) {
if pos < len(i.str) {
c := i.str[pos]
if c < utf8.RuneSelf {
return rune(c), 1
}
return utf8.DecodeRune(i.str[pos:])
}
return endOfText, 0
Expand All @@ -456,17 +442,11 @@ func (i *inputBytes) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRune(i.str[:pos])
}
r1, _ = utf8.DecodeLastRune(i.str[:pos])
}
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRune(i.str[pos:])
}
r2, _ = utf8.DecodeRune(i.str[pos:])
}
return newLazyFlag(r1, r2)
}
Expand Down
8 changes: 2 additions & 6 deletions src/strconv/quote.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b
buf = nBuf
}
buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
r, width = utf8.DecodeRuneInString(s)
if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
Expand Down
6 changes: 1 addition & 5 deletions src/strings/iter.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] {
return func(yield func(string) bool) {
start := -1
for i := 0; i < len(s); {
size := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRuneInString(s[i:])
}
r, size := utf8.DecodeRuneInString(s[i:])
if f(r) {
if start >= 0 {
if !yield(s[start:i]) {
Expand Down
4 changes: 0 additions & 4 deletions src/strings/reader.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,6 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) {
return 0, 0, io.EOF
}
r.prevRune = int(r.i)
if c := r.s[r.i]; c < utf8.RuneSelf {
r.i++
return rune(c), 1, nil
}
ch, size = utf8.DecodeRuneInString(r.s[r.i:])
r.i += int64(size)
return
Expand Down
16 changes: 4 additions & 12 deletions src/strings/strings.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,7 @@ func TrimLeftFunc(s string, f func(rune) bool) string {
// Unicode code points c satisfying f(c) removed.
func TrimRightFunc(s string, f func(rune) bool) string {
i := lastIndexFunc(s, f, false)
if i >= 0 && s[i] >= utf8.RuneSelf {
if i >= 0 {
_, wid := utf8.DecodeRuneInString(s[i:])
i += wid
} else {
Expand Down Expand Up @@ -1028,10 +1028,7 @@ func trimLeftASCII(s string, as *asciiSet) string {

func trimLeftUnicode(s, cutset string) string {
for len(s) > 0 {
r, n := rune(s[0]), 1
if r >= utf8.RuneSelf {
r, n = utf8.DecodeRuneInString(s)
}
r, n := utf8.DecodeRuneInString(s)
if !ContainsRune(cutset, r) {
break
}
Expand Down Expand Up @@ -1229,13 +1226,8 @@ hasUnicode:
}

// Extract first rune from second string.
var tr rune
if t[0] < utf8.RuneSelf {
tr, t = rune(t[0]), t[1:]
} else {
r, size := utf8.DecodeRuneInString(t)
tr, t = r, t[size:]
}
tr, size := utf8.DecodeRuneInString(t)
t = t[size:]

// If they match, keep going; if not, return false.

Expand Down
26 changes: 26 additions & 0 deletions src/unicode/utf8/utf8.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,20 @@ func FullRuneInString(s string) bool {
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeRune(p []byte) (r rune, size int) {
// Inlineable fast path for ASCII characters; see #48195.
// This implementation is weird but effective at rendering the
// function inlineable.
for _, b := range p {
if b < RuneSelf {
return rune(b), 1
}
break
}
r, size = decodeRuneSlow(p)
return
}

func decodeRuneSlow(p []byte) (r rune, size int) {
n := len(p)
if n < 1 {
return RuneError, 0
Expand Down Expand Up @@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) {
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeRuneInString(s string) (r rune, size int) {
// Inlineable fast path for ASCII characters; see #48195.
// This implementation is a bit weird but effective at rendering the
// function inlineable.
if s != "" && s[0] < RuneSelf {
return rune(s[0]), 1
} else {
r, size = decodeRuneInStringSlow(s)
}
return
}

func decodeRuneInStringSlow(s string) (rune, int) {
n := len(s)
if n < 1 {
return RuneError, 0
Expand Down
27 changes: 23 additions & 4 deletions src/unicode/utf8/utf8_test.go
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) {

func BenchmarkDecodeASCIIRune(b *testing.B) {
a := []byte{'a'}
for i := 0; i < b.N; i++ {
DecodeRune(a)
for range b.N {
runeSink, sizeSink = DecodeRune(a)
}
}

func BenchmarkDecodeJapaneseRune(b *testing.B) {
nihon := []byte("本")
for i := 0; i < b.N; i++ {
DecodeRune(nihon)
for range b.N {
runeSink, sizeSink = DecodeRune(nihon)
}
}

func BenchmarkDecodeASCIIRuneInString(b *testing.B) {
a := "a"
for range b.N {
runeSink, sizeSink = DecodeRuneInString(a)
}
}

func BenchmarkDecodeJapaneseRuneInString(b *testing.B) {
nihon := "本"
for range b.N {
runeSink, sizeSink = DecodeRuneInString(nihon)
}
}

var (
runeSink rune
sizeSink int
)

// boolSink is used to reference the return value of benchmarked
// functions to avoid dead code elimination.
var boolSink bool
Expand Down

AltStyle によって変換されたページ (->オリジナル) /