Skip to content

Commit 5af0b28

Browse files
committed
runtime: iterate over set bits in adjustpointers
There are several things combined in this change. First, eliminate the gobitvector type in favor of adding a ptrbit method to bitvector. In non-performance-critical code, use that method. In performance critical code, though, load the bitvector data one byte at a time and iterate only over set bits. To support that, add and use sys.Ctz8. name old time/op new time/op delta StackCopyPtr-8 81.8ms ± 5% 78.9ms ± 3% -3.58% (p=0.000 n=97+96) StackCopy-8 65.9ms ± 3% 62.8ms ± 3% -4.67% (p=0.000 n=96+92) StackCopyNoCache-8 105ms ± 3% 102ms ± 3% -3.38% (p=0.000 n=96+95) Change-Id: I00b80f45612708bd440b1a411a57fa6dfa24aa74 Reviewed-on: https://go-review.googlesource.com/109716 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
1 parent 13cd006 commit 5af0b28

File tree

7 files changed

+75
-46
lines changed

7 files changed

+75
-46
lines changed

src/cmd/compile/internal/gc/ssa.go

+2
Original file line numberDiff line numberDiff line change
@@ -2980,6 +2980,8 @@ func init() {
29802980
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas", p4...)
29812981
alias("runtime/internal/atomic", "Casp1", "runtime/internal/atomic", "Cas64", p8...)
29822982

2983+
alias("runtime/internal/sys", "Ctz8", "math/bits", "TrailingZeros8", all...)
2984+
29832985
/******** math ********/
29842986
addF("math", "Sqrt",
29852987
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {

src/runtime/heapdump.go

+2-3
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,8 @@ type childInfo struct {
233233

234234
// dump kinds & offsets of interesting fields in bv
235235
func dumpbv(cbv *bitvector, offset uintptr) {
236-
bv := gobv(*cbv)
237-
for i := uintptr(0); i < bv.n; i++ {
238-
if bv.bytedata[i/8]>>(i%8)&1 == 1 {
236+
for i := uintptr(0); i < uintptr(cbv.n); i++ {
237+
if cbv.ptrbit(i) == 1 {
239238
dumpint(fieldKindPtr)
240239
dumpint(uint64(offset + i*sys.PtrSize))
241240
}

src/runtime/internal/sys/intrinsics.go

+24
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,30 @@ func Ctz32(x uint32) int {
5050
return i + z
5151
}
5252

53+
// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
54+
func Ctz8(x uint8) int {
55+
return int(ntz8tab[x])
56+
}
57+
58+
var ntz8tab = [256]uint8{
59+
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
60+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
61+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
62+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
63+
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
64+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
65+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
66+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
67+
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
68+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
69+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
70+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
71+
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
72+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
73+
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
74+
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
75+
}
76+
5377
// Bswap64 returns its input with byte order reversed
5478
// 0x0102030405060708 -> 0x0807060504030201
5579
func Bswap64(x uint64) uint64 {

src/runtime/internal/sys/intrinsics_386.s

+8
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ TEXT runtime∕internal∕sys·Ctz32(SB), NOSPLIT, $0-8
3434
MOVL AX, ret+4(FP)
3535
RET
3636

37+
TEXT runtime∕internal∕sys·Ctz8(SB), NOSPLIT, $0-8
38+
MOVBLZX x+0(FP), AX
39+
BSFL AX, AX
40+
JNZ 2(PC)
41+
MOVL $8, AX
42+
MOVL AX, ret+4(FP)
43+
RET
44+
3745
TEXT runtime∕internal∕sys·Bswap64(SB), NOSPLIT, $0-16
3846
MOVL x_lo+0(FP), AX
3947
MOVL x_hi+4(FP), BX

src/runtime/internal/sys/intrinsics_stubs.go

+1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ package sys
88

99
func Ctz64(x uint64) int
1010
func Ctz32(x uint32) int
11+
func Ctz8(x uint8) int
1112
func Bswap64(x uint64) uint64
1213
func Bswap32(x uint32) uint32

src/runtime/mbitmap.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -2021,9 +2021,8 @@ func getgcmask(ep interface{}) (mask []byte) {
20212021
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
20222022
mask = make([]byte, n/sys.PtrSize)
20232023
for i := uintptr(0); i < n; i += sys.PtrSize {
2024-
bitmap := bv.bytedata
20252024
off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
2026-
mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
2025+
mask[i/sys.PtrSize] = bv.ptrbit(off)
20272026
}
20282027
}
20292028
return

src/runtime/stack.go

+37-41
Original file line numberDiff line numberDiff line change
@@ -544,64 +544,60 @@ type bitvector struct {
544544
bytedata *uint8
545545
}
546546

547-
type gobitvector struct {
548-
n uintptr
549-
bytedata []uint8
550-
}
551-
552-
func gobv(bv bitvector) gobitvector {
553-
return gobitvector{
554-
uintptr(bv.n),
555-
(*[1 << 30]byte)(unsafe.Pointer(bv.bytedata))[:(bv.n+7)/8],
556-
}
557-
}
558-
559-
func ptrbit(bv *gobitvector, i uintptr) uint8 {
560-
return (bv.bytedata[i/8] >> (i % 8)) & 1
547+
// ptrbit returns the i'th bit in bv.
548+
// ptrbit is less efficient than iterating directly over bitvector bits,
549+
// and should only be used in non-performance-critical code.
550+
// See adjustpointers for an example of a high-efficiency walk of a bitvector.
551+
func (bv *bitvector) ptrbit(i uintptr) uint8 {
552+
b := *(addb(bv.bytedata, i/8))
553+
return (b >> (i % 8)) & 1
561554
}
562555

563556
// bv describes the memory starting at address scanp.
564557
// Adjust any pointers contained therein.
565-
func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f funcInfo) {
566-
bv := gobv(*cbv)
558+
func adjustpointers(scanp unsafe.Pointer, bv *bitvector, adjinfo *adjustinfo, f funcInfo) {
567559
minp := adjinfo.old.lo
568560
maxp := adjinfo.old.hi
569561
delta := adjinfo.delta
570-
num := bv.n
562+
num := uintptr(bv.n)
571563
// If this frame might contain channel receive slots, use CAS
572564
// to adjust pointers. If the slot hasn't been received into
573565
// yet, it may contain stack pointers and a concurrent send
574566
// could race with adjusting those pointers. (The sent value
575567
// itself can never contain stack pointers.)
576568
useCAS := uintptr(scanp) < adjinfo.sghi
577-
for i := uintptr(0); i < num; i++ {
569+
for i := uintptr(0); i < num; i += 8 {
578570
if stackDebug >= 4 {
579-
print(" ", add(scanp, i*sys.PtrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*sys.PtrSize))), " # ", i, " ", bv.bytedata[i/8], "\n")
580-
}
581-
if ptrbit(&bv, i) != 1 {
582-
continue
583-
}
584-
pp := (*uintptr)(add(scanp, i*sys.PtrSize))
585-
retry:
586-
p := *pp
587-
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
588-
// Looks like a junk value in a pointer slot.
589-
// Live analysis wrong?
590-
getg().m.traceback = 2
591-
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
592-
throw("invalid pointer found on stack")
571+
for j := uintptr(0); j < 8; j++ {
572+
print(" ", add(scanp, (i+j)*sys.PtrSize), ":", ptrnames[bv.ptrbit(i+j)], ":", hex(*(*uintptr)(add(scanp, (i+j)*sys.PtrSize))), " # ", i, " ", *addb(bv.bytedata, i/8), "\n")
573+
}
593574
}
594-
if minp <= p && p < maxp {
595-
if stackDebug >= 3 {
596-
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
575+
b := *(addb(bv.bytedata, i/8))
576+
for b != 0 {
577+
j := uintptr(sys.Ctz8(b))
578+
b &= b - 1
579+
pp := (*uintptr)(add(scanp, (i+j)*sys.PtrSize))
580+
retry:
581+
p := *pp
582+
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
583+
// Looks like a junk value in a pointer slot.
584+
// Live analysis wrong?
585+
getg().m.traceback = 2
586+
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
587+
throw("invalid pointer found on stack")
597588
}
598-
if useCAS {
599-
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
600-
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
601-
goto retry
589+
if minp <= p && p < maxp {
590+
if stackDebug >= 3 {
591+
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
592+
}
593+
if useCAS {
594+
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
595+
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
596+
goto retry
597+
}
598+
} else {
599+
*pp = p + delta
602600
}
603-
} else {
604-
*pp = p + delta
605601
}
606602
}
607603
}

0 commit comments

Comments
 (0)