]> git.feebdaed.xyz Git - 0xmirror/go.git/commitdiff
internal/trace: correctly handle GoUndetermined for GoroutineSummary
authorMichael Anthony Knyszek <mknyszek@google.com>
Wed, 10 Dec 2025 02:50:56 +0000 (02:50 +0000)
committerMichael Knyszek <mknyszek@google.com>
Thu, 11 Dec 2025 16:34:39 +0000 (08:34 -0800)
Currently the trace summarization incorrectly handles GoUndetermined by
treating it too much like GoNotExist. In particular, it should be
accumulating all the time since the start of the trace in a particular
bucket, but it doesn't, so that instead gets counted as "unknown time"
because the "creation time" is at the start of the trace.

This change fixes the problem by simply doing the accumulation. It's
very straightforward. It also side-steps some other inaccuracies, like
associating a goroutine that is being named with the current task. I
don't think this can ever actually happen in practice, but splitting up
the two cases, GoUndetermined and GoNotExist, fixes it.

Fixes #76716.

Change-Id: I3ac1557044f99c92bada2cb0e124b2192b1d6ebb
Reviewed-on: https://go-review.googlesource.com/c/go/+/728822
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Nick Ripley <nick.ripley@datadoghq.com>
src/internal/trace/summary.go
src/internal/trace/summary_test.go

index f31439feb8ff0b1efce565bc5470e2ccf144e232..68a924e6b9a89200d07fc1cbf63d79d23e281b98 100644 (file)
@@ -321,16 +321,47 @@ func (s *Summarizer) Event(ev *Event) {
                        // Handle transition out.
                        g := s.gs[id]
                        switch old {
-                       case GoUndetermined, GoNotExist:
+                       case GoUndetermined:
                                g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
+
+                               // The goroutine is being named for the first time.
                                // If we're coming out of GoUndetermined, then the creation time is the
                                // time of the last sync.
-                               if old == GoUndetermined {
-                                       g.CreationTime = s.syncTs
-                               } else {
-                                       g.CreationTime = ev.Time()
+                               g.CreationTime = s.syncTs
+                               g.lastRangeTime = make(map[string]Time)
+                               g.BlockTimeByReason = make(map[string]time.Duration)
+                               g.RangeTime = make(map[string]time.Duration)
+
+                               // Accumulate all the time we spent in new as if it was old.
+                               //
+                               // Note that we still handle "new" again below because the time until
+                               // the *next* event still needs to be accumulated (even though it's
+                               // probably going to come immediately after this event).
+                               stateTime := ev.Time().Sub(s.syncTs)
+                               switch new {
+                               case GoRunning:
+                                       g.ExecTime += stateTime
+                               case GoWaiting:
+                                       g.BlockTimeByReason[st.Reason] += stateTime
+                               case GoRunnable:
+                                       g.SchedWaitTime += stateTime
+                               case GoSyscall:
+                                       // For a syscall, we're not going to be 'naming' this P from afar.
+                                       // It's "executing" somewhere. If we've got a P, then that means
+                                       // we've had that P since the start, so this is regular syscall time.
+                                       // Otherwise, this is syscall block time (no P).
+                                       if ev.Proc() == NoProc {
+                                               g.SyscallBlockTime += stateTime
+                                       } else {
+                                               g.SyscallTime += stateTime
+                                       }
                                }
-                               // The goroutine is being created, or it's being named for the first time.
+                               s.gs[g.ID] = g
+                       case GoNotExist:
+                               g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
+
+                               // The goroutine is being created.
+                               g.CreationTime = ev.Time()
                                g.lastRangeTime = make(map[string]Time)
                                g.BlockTimeByReason = make(map[string]time.Duration)
                                g.RangeTime = make(map[string]time.Duration)
index c5f1910262cb2acbc9c793c54c787d17b2bcc652..74bffefd70844a6089d89115d74cdee388aab0b1 100644 (file)
@@ -17,6 +17,7 @@ func TestSummarizeGoroutinesTrace(t *testing.T) {
                hasSchedWaitTime    bool
                hasSyncBlockTime    bool
                hasGCMarkAssistTime bool
+               hasUnknownTime      bool
        )
 
        assertContainsGoroutine(t, summaries, "runtime.gcBgMarkWorker")
@@ -31,6 +32,7 @@ func TestSummarizeGoroutinesTrace(t *testing.T) {
                if dt, ok := summary.RangeTime["GC mark assist"]; ok && dt > 0 {
                        hasGCMarkAssistTime = true
                }
+               hasUnknownTime = hasUnknownTime || summary.UnknownTime() > 0
        }
        if !hasSchedWaitTime {
                t.Error("missing sched wait time")
@@ -41,6 +43,9 @@ func TestSummarizeGoroutinesTrace(t *testing.T) {
        if !hasGCMarkAssistTime {
                t.Error("missing GC mark assist time")
        }
+       if hasUnknownTime {
+               t.Error("has time that is unaccounted for")
+       }
 }
 
 func TestSummarizeGoroutinesRegionsTrace(t *testing.T) {