"testing"
)
-func TestNotInGoMetricCallback(t *testing.T) {
+func TestNotInGoMetric(t *testing.T) {
switch runtime.GOOS {
case "windows", "plan9":
t.Skip("unsupported on Windows and Plan9")
}
}
- // This test is run in a subprocess to prevent other tests from polluting the metrics
- // and because we need to make some cgo callbacks.
- output := runTestProg(t, "testprogcgo", "NotInGoMetricCallback")
- want := "OK\n"
- if output != want {
- t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
+ run := func(t *testing.T, name string) {
+ // This test is run in a subprocess to prevent other tests from polluting the metrics
+ // and because we need to make some cgo callbacks.
+ output := runTestProg(t, "testprogcgo", name)
+ want := "OK\n"
+ if output != want {
+ t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
+ }
}
+ t.Run("CgoCall", func(t *testing.T) {
+ run(t, "NotInGoMetricCgoCall")
+ })
+ t.Run("CgoCallback", func(t *testing.T) {
+ run(t, "NotInGoMetricCgoCallback")
+ })
+ t.Run("CgoCallAndCallback", func(t *testing.T) {
+ run(t, "NotInGoMetricCgoCallAndCallback")
+ })
}
// mp.curg is now a real goroutine.
casgstatus(mp.curg, _Gdeadextra, _Gsyscall)
sched.ngsys.Add(-1)
- // N.B. We do not update nGsyscallNoP, because isExtraInC threads are not
- // counted as real goroutines while they're in C.
+
+ // This is technically inaccurate, but we set isExtraInC to false above,
+ // and so we need to update addGSyscallNoP to keep the two pieces of state
+ // consistent (it's only updated when isExtraInC is false). More specifically,
+ // When we get to cgocallbackg and exitsyscall, we'll be looking for a P, and
+ // since isExtraInC is false, we will decrement this metric.
+ //
+ // The inaccuracy is thankfully transient: only until this thread can get a P.
+ // We're going into Go anyway, so it's okay to pretend we're a real goroutine now.
+ addGSyscallNoP(mp)
if !signal {
if trace.ok() {
if oldp != nil {
if thread, ok := setBlockOnExitSyscall(oldp); ok {
thread.takeP()
- addGSyscallNoP(thread.mp) // takeP does the opposite, but this is a net zero change.
+ decGSyscallNoP(getg().m) // We got a P for ourselves.
thread.resume()
return oldp
}
#include <pthread.h>
extern void Ready();
+extern void BlockForeverInGo();
static _Atomic int spinning;
static _Atomic int released;
atomic_store(&spinning, 0);
atomic_store(&released, 1);
}
+
+static void* enterGoThenWait(void* arg __attribute__ ((unused))) {
+ BlockForeverInGo();
+ return NULL;
+}
+
+static void WaitInGoInNewCThread() {
+ pthread_t tid;
+ pthread_create(&tid, NULL, enterGoThenWait, NULL);
+}
+
+static void SpinForever() {
+ atomic_fetch_add(&spinning, 1);
+ while(1) {};
+}
*/
import "C"
"os"
"runtime"
"runtime/metrics"
+ "sync/atomic"
)
func init() {
- register("NotInGoMetricCallback", NotInGoMetricCallback)
+ register("NotInGoMetricCgoCall", NotInGoMetricCgoCall)
+ register("NotInGoMetricCgoCallback", NotInGoMetricCgoCallback)
+ register("NotInGoMetricCgoCallAndCallback", NotInGoMetricCgoCallAndCallback)
}
-func NotInGoMetricCallback() {
+// NotInGoMetric just double-checks that N goroutines in cgo count as the metric reading N.
+func NotInGoMetricCgoCall() {
const N = 10
+
+ // Spin up the same number of goroutines that will all wait in a cgo call.
+ for range N {
+ go func() {
+ C.SpinForever()
+ }()
+ }
+
+ // Make sure we're all blocked and spinning.
+ for C.Spinning() < N {
+ }
+
+ // Read not-in-go before taking the Ps back.
s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
+ failed := false
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("pre-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Do something that stops the world to take all the Ps back.
+ //
+ // This will force a re-accounting of some of the goroutines and
+ // re-checking not-in-go will help catch bugs.
+ runtime.ReadMemStats(&m)
+
+ // Read not-in-go.
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("post-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Fail if we get a bad reading.
+ if failed {
+ os.Exit(2)
+ }
+ println("OK")
+}
+
+// NotInGoMetricCgoCallback tests that threads that called into Go, then returned
+// to C with *no* Go on the stack, are *not* counted as not-in-go in the
+// runtime/metrics package.
+func NotInGoMetricCgoCallback() {
+ const N = 10
// Create N new C threads that have called into Go at least once.
for range N {
}
// Read not-in-go.
+ s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
metrics.Read(s)
if n := s[0].Value.Uint64(); n != 0 {
println("expected 0 not-in-go goroutines, found", n)
func Ready() {
readyCh <- true
}
+
+// NotInGoMetricCgoCallAndCallback tests that threads that called into Go are not
+// keeping the count of not-in-go threads negative. Specifically, needm sets
+// isExtraInC to false, breaking some of the invariants behind the not-in-go
+// runtime/metrics metric, causing the underlying count to break if we don't
+// account for this. In go.dev/cl/726964 this amounts to nGsyscallNoP being negative.
+// Unfortunately the runtime/metrics package masks a negative nGsyscallNoP because
+// it can transiently go negative due to a race. Therefore, this test checks
+// the condition by making sure not-in-go is positive when we expect it to be.
+// That is, threads in a cgo callback are *not* cancelling out threads in a
+// regular cgo call.
+func NotInGoMetricCgoCallAndCallback() {
+ const N = 10
+
+ // Spin up some threads that will do a cgo callback and just wait in Go.
+ // These threads are the ones we're worried about having the incorrect
+ // accounting that skews the count later.
+ for range N {
+ C.WaitInGoInNewCThread()
+ }
+
+ // Spin up the same number of goroutines that will all wait in a cgo call.
+ for range N {
+ go func() {
+ C.SpinForever()
+ }()
+ }
+
+ // Make sure we're all blocked and spinning.
+ for C.Spinning() < N || blockedForever.Load() < N {
+ }
+
+ // Read not-in-go before taking the Ps back.
+ s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
+ failed := false
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("pre-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Do something that stops the world to take all the Ps back.
+ //
+ // This will force a re-accounting of some of the goroutines and
+ // re-checking not-in-go will help catch bugs.
+ runtime.ReadMemStats(&m)
+
+ // Read not-in-go.
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("post-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Fail if we get a bad reading.
+ if failed {
+ os.Exit(2)
+ }
+ println("OK")
+}
+
+var blockedForever atomic.Uint32
+
+//export BlockForeverInGo
+func BlockForeverInGo() {
+ blockedForever.Add(1)
+ select {}
+}