Skip to content

Commit 873bd47

Browse files
committed
runtime: flush mcaches lazily
Currently, all mcaches are flushed during STW mark termination as a root marking job. This is currently necessary because all spans must be out of these caches before sweeping begins to avoid races with allocation and to ensure the spans are in the state expected by sweeping. We do it as a root marking job because mcache flushing is somewhat expensive and O(GOMAXPROCS) and this parallelizes the work across the Ps. However, it's also the last remaining root marking job performed during mark termination. This CL moves mcache flushing out of mark termination and performs it lazily. We keep track of the last sweepgen at which each mcache was flushed and as each P is woken from STW, it observes that its mcache is out-of-date and flushes it. The introduces a complication for spans cached in stale mcaches. These may now be observed by background or proportional sweeping or when attempting to add a finalizer, but aren't in a stable state. For example, they are likely to be on the wrong mcentral list. To fix this, this CL extends the sweepgen protocol to also capture whether a span is cached and, if so, whether or not its cache is stale. This protocol blocks asynchronous sweeping from touching cached spans and makes it the responsibility of mcache flushing to sweep the flushed spans. This eliminates the last mark termination root marking job, which means we can now eliminate that entire infrastructure. Updates #26903. This implements lazy mcache flushing. Change-Id: Iadda7aabe540b2026cffc5195da7be37d5b4125e Reviewed-on: https://go-review.googlesource.com/c/134783 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
1 parent 457c8f4 commit 873bd47

File tree

7 files changed

+118
-28
lines changed

7 files changed

+118
-28
lines changed

src/runtime/mcache.go

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
package runtime
66

7-
import "unsafe"
7+
import (
8+
"runtime/internal/atomic"
9+
"unsafe"
10+
)
811

912
// Per-thread (in Go, per-P) cache for small objects.
1013
// No locking needed because it is per-thread (per-P).
@@ -42,6 +45,12 @@ type mcache struct {
4245
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
4346
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
4447
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
48+
49+
// flushGen indicates the sweepgen during which this mcache
50+
// was last flushed. If flushGen != mheap_.sweepgen, the spans
51+
// in this mcache are stale and need to the flushed so they
52+
// can be swept. This is done in acquirep.
53+
flushGen uint32
4554
}
4655

4756
// A gclink is a node in a linked list of blocks, like mlink,
@@ -76,6 +85,7 @@ var emptymspan mspan
7685
func allocmcache() *mcache {
7786
lock(&mheap_.lock)
7887
c := (*mcache)(mheap_.cachealloc.alloc())
88+
c.flushGen = mheap_.sweepgen
7989
unlock(&mheap_.lock)
8090
for i := range c.alloc {
8191
c.alloc[i] = &emptymspan
@@ -113,9 +123,12 @@ func (c *mcache) refill(spc spanClass) {
113123
if uintptr(s.allocCount) != s.nelems {
114124
throw("refill of span with free space remaining")
115125
}
116-
117126
if s != &emptymspan {
118-
s.incache = false
127+
// Mark this span as no longer cached.
128+
if s.sweepgen != mheap_.sweepgen+3 {
129+
throw("bad sweepgen in refill")
130+
}
131+
atomic.Store(&s.sweepgen, mheap_.sweepgen)
119132
}
120133

121134
// Get a new cached span from the central lists.
@@ -128,6 +141,10 @@ func (c *mcache) refill(spc spanClass) {
128141
throw("span has no free space")
129142
}
130143

144+
// Indicate that this span is cached and prevent asynchronous
145+
// sweeping in the next sweep phase.
146+
s.sweepgen = mheap_.sweepgen + 3
147+
131148
c.alloc[spc] = s
132149
}
133150

@@ -143,3 +160,26 @@ func (c *mcache) releaseAll() {
143160
c.tiny = 0
144161
c.tinyoffset = 0
145162
}
163+
164+
// prepareForSweep flushes c if the system has entered a new sweep phase
165+
// since c was populated. This must happen between the sweep phase
166+
// starting and the first allocation from c.
167+
func (c *mcache) prepareForSweep() {
168+
// Alternatively, instead of making sure we do this on every P
169+
// between starting the world and allocating on that P, we
170+
// could leave allocate-black on, allow allocation to continue
171+
// as usual, use a ragged barrier at the beginning of sweep to
172+
// ensure all cached spans are swept, and then disable
173+
// allocate-black. However, with this approach it's difficult
174+
// to avoid spilling mark bits into the *next* GC cycle.
175+
sg := mheap_.sweepgen
176+
if c.flushGen == sg {
177+
return
178+
} else if c.flushGen != sg-2 {
179+
println("bad flushGen", c.flushGen, "in prepareForSweep; sweepgen", sg)
180+
throw("bad flushGen")
181+
}
182+
c.releaseAll()
183+
stackcache_clear(c)
184+
atomic.Store(&c.flushGen, mheap_.sweepgen) // Synchronizes with gcStart
185+
}

src/runtime/mcentral.go

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ havespan:
135135
// heap_live changed.
136136
gcController.revise()
137137
}
138-
s.incache = true
139138
freeByteBase := s.freeindex &^ (64 - 1)
140139
whichByte := freeByteBase / 8
141140
// Init alloc bits cache.
@@ -150,28 +149,54 @@ havespan:
150149

151150
// Return span from an MCache.
152151
func (c *mcentral) uncacheSpan(s *mspan) {
153-
lock(&c.lock)
154-
155-
s.incache = false
156-
157152
if s.allocCount == 0 {
158153
throw("uncaching span but s.allocCount == 0")
159154
}
160155

161156
cap := int32((s.npages << _PageShift) / s.elemsize)
162157
n := cap - int32(s.allocCount)
158+
159+
// cacheSpan updated alloc assuming all objects on s were
160+
// going to be allocated. Adjust for any that weren't. We must
161+
// do this before potentially sweeping the span.
163162
if n > 0 {
163+
atomic.Xadd64(&c.nmalloc, -int64(n))
164+
}
165+
166+
sg := mheap_.sweepgen
167+
stale := s.sweepgen == sg+1
168+
if stale {
169+
// Span was cached before sweep began. It's our
170+
// responsibility to sweep it.
171+
//
172+
// Set sweepgen to indicate it's not cached but needs
173+
// sweeping. sweep will set s.sweepgen to indicate s
174+
// is swept.
175+
s.sweepgen = sg - 1
176+
s.sweep(true)
177+
// sweep may have freed objects, so recompute n.
178+
n = cap - int32(s.allocCount)
179+
} else {
180+
// Indicate that s is no longer cached.
181+
s.sweepgen = sg
182+
}
183+
184+
if n > 0 {
185+
lock(&c.lock)
164186
c.empty.remove(s)
165187
c.nonempty.insert(s)
166-
// mCentral_CacheSpan conservatively counted
167-
// unallocated slots in heap_live. Undo this.
168-
atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
169-
// cacheSpan updated alloc assuming all objects on s
170-
// were going to be allocated. Adjust for any that
171-
// weren't.
172-
atomic.Xadd64(&c.nmalloc, -int64(n))
188+
if !stale {
189+
// mCentral_CacheSpan conservatively counted
190+
// unallocated slots in heap_live. Undo this.
191+
//
192+
// If this span was cached before sweep, then
193+
// heap_live was totally recomputed since
194+
// caching this span, so we don't do this for
195+
// stale spans.
196+
atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
197+
}
198+
unlock(&c.lock)
173199
}
174-
unlock(&c.lock)
175200
}
176201

177202
// freeSpan updates c and s after sweeping s.
@@ -183,13 +208,13 @@ func (c *mcentral) uncacheSpan(s *mspan) {
183208
// If preserve=true, it does not move s (the caller
184209
// must take care of it).
185210
func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
186-
if s.incache {
211+
if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 {
187212
throw("freeSpan given cached span")
188213
}
189214
s.needzero = 1
190215

191216
if preserve {
192-
// preserve is set only when called from MCentral_CacheSpan above,
217+
// preserve is set only when called from (un)cacheSpan above,
193218
// the span must be in the empty list.
194219
if !s.inList() {
195220
throw("can't preserve unlinked span")

src/runtime/mgc.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,14 @@ func gcStart(trigger gcTrigger) {
12621262
traceGCStart()
12631263
}
12641264

1265+
// Check that all Ps have finished deferred mcache flushes.
1266+
for _, p := range allp {
1267+
if fg := atomic.Load(&p.mcache.flushGen); fg != mheap_.sweepgen {
1268+
println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen)
1269+
throw("p mcache not flushed")
1270+
}
1271+
}
1272+
12651273
gcBgMarkStartWorkers()
12661274

12671275
gcResetMarkState()
@@ -1606,6 +1614,16 @@ func gcMarkTermination(nextTriggerRatio float64) {
16061614
// Free stack spans. This must be done between GC cycles.
16071615
systemstack(freeStackSpans)
16081616

1617+
// Ensure all mcaches are flushed. Each P will flush its own
1618+
// mcache before allocating, but idle Ps may not. Since this
1619+
// is necessary to sweep all spans, we need to ensure all
1620+
// mcaches are flushed before we start the next GC cycle.
1621+
systemstack(func() {
1622+
forEachP(func(_p_ *p) {
1623+
_p_.mcache.prepareForSweep()
1624+
})
1625+
})
1626+
16091627
// Print gctrace before dropping worldsema. As soon as we drop
16101628
// worldsema another cycle could start and smash the stats
16111629
// we're trying to print.

src/runtime/mgcmark.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,7 @@ const (
5252
//
5353
//go:nowritebarrier
5454
func gcMarkRootPrepare() {
55-
if gcphase == _GCmarktermination {
56-
work.nFlushCacheRoots = int(gomaxprocs)
57-
} else {
58-
work.nFlushCacheRoots = 0
59-
}
55+
work.nFlushCacheRoots = 0
6056

6157
// Compute how many data and BSS root blocks there are.
6258
nBlocks := func(bytes uintptr) int {
@@ -344,7 +340,8 @@ func markrootSpans(gcw *gcWork, shard int) {
344340
if s.state != mSpanInUse {
345341
continue
346342
}
347-
if !useCheckmark && s.sweepgen != sg {
343+
// Check that this span was swept (it may be cached or uncached).
344+
if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
348345
// sweepgen was updated (+2) during non-checkmark GC pass
349346
print("sweep ", s.sweepgen, " ", sg, "\n")
350347
throw("gc: unswept span")

src/runtime/mgcsweep.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ func (s *mspan) ensureSwept() {
161161
}
162162

163163
sg := mheap_.sweepgen
164-
if atomic.Load(&s.sweepgen) == sg {
164+
spangen := atomic.Load(&s.sweepgen)
165+
if spangen == sg || spangen == sg+3 {
165166
return
166167
}
167168
// The caller must be sure that the span is a mSpanInUse span.
@@ -170,7 +171,11 @@ func (s *mspan) ensureSwept() {
170171
return
171172
}
172173
// unfortunate condition, and we don't have efficient means to wait
173-
for atomic.Load(&s.sweepgen) != sg {
174+
for {
175+
spangen := atomic.Load(&s.sweepgen)
176+
if spangen == sg || spangen == sg+3 {
177+
break
178+
}
174179
osyield()
175180
}
176181
}

src/runtime/mheap.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,14 +317,15 @@ type mspan struct {
317317
// if sweepgen == h->sweepgen - 2, the span needs sweeping
318318
// if sweepgen == h->sweepgen - 1, the span is currently being swept
319319
// if sweepgen == h->sweepgen, the span is swept and ready to use
320+
// if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
321+
// if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
320322
// h->sweepgen is incremented by 2 after every GC
321323

322324
sweepgen uint32
323325
divMul uint16 // for divide by elemsize - divMagic.mul
324326
baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
325327
allocCount uint16 // number of allocated objects
326328
spanclass spanClass // size class and noscan (uint8)
327-
incache bool // being used by an mcache
328329
state mSpanState // mspaninuse etc
329330
needzero uint8 // needs to be zeroed before allocation
330331
divShift uint8 // for divide by elemsize - divMagic.shift
@@ -1185,7 +1186,6 @@ func (span *mspan) init(base uintptr, npages uintptr) {
11851186
span.npages = npages
11861187
span.allocCount = 0
11871188
span.spanclass = 0
1188-
span.incache = false
11891189
span.elemsize = 0
11901190
span.state = mSpanDead
11911191
span.unusedsince = 0

src/runtime/proc.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4119,6 +4119,7 @@ func procresize(nprocs int32) *p {
41194119
if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
41204120
// continue to use the current P
41214121
_g_.m.p.ptr().status = _Prunning
4122+
_g_.m.p.ptr().mcache.prepareForSweep()
41224123
} else {
41234124
// release the current P and acquire allp[0]
41244125
if _g_.m.p != 0 {
@@ -4169,6 +4170,10 @@ func acquirep(_p_ *p) {
41694170
_g_ := getg()
41704171
_g_.m.mcache = _p_.mcache
41714172

4173+
// Perform deferred mcache flush before this P can allocate
4174+
// from a potentially stale mcache.
4175+
_p_.mcache.prepareForSweep()
4176+
41724177
if trace.enabled {
41734178
traceProcStart()
41744179
}

0 commit comments

Comments
 (0)