...

Source file src/internal/trace/order.go

Documentation: internal/trace

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trace
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  
    11  	"internal/trace/event"
    12  	"internal/trace/event/go122"
    13  	"internal/trace/version"
    14  )
    15  
    16  // ordering emulates Go scheduler state for both validation and
    17  // for putting events in the right order.
    18  //
    19  // The interface to ordering consists of two methods: Advance
    20  // and Next. Advance is called to try and advance an event and
    21  // add completed events to the ordering. Next is used to pick
    22  // off events in the ordering.
    23  type ordering struct {
    24  	gStates     map[GoID]*gState
    25  	pStates     map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
    26  	mStates     map[ThreadID]*mState
    27  	activeTasks map[TaskID]taskState
    28  	gcSeq       uint64
    29  	gcState     gcState
    30  	initialGen  uint64
    31  	queue       queue[Event]
    32  }
    33  
    34  // Advance checks if it's valid to proceed with ev which came from thread m.
    35  //
    36  // It assumes the gen value passed to it is monotonically increasing across calls.
    37  //
    38  // If any error is returned, then the trace is broken and trace parsing must cease.
    39  // If it's not valid to advance with ev, but no error was encountered, the caller
    40  // should attempt to advance with other candidate events from other threads. If the
    41  // caller runs out of candidates, the trace is invalid.
    42  //
    43  // If this returns true, Next is guaranteed to return a complete event. However,
    44  // multiple events may be added to the ordering, so the caller should (but is not
    45  // required to) continue to call Next until it is exhausted.
    46  func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) {
    47  	if o.initialGen == 0 {
    48  		// Set the initial gen if necessary.
    49  		o.initialGen = gen
    50  	}
    51  
    52  	var curCtx, newCtx schedCtx
    53  	curCtx.M = m
    54  	newCtx.M = m
    55  
    56  	var ms *mState
    57  	if m == NoThread {
    58  		curCtx.P = NoProc
    59  		curCtx.G = NoGoroutine
    60  		newCtx = curCtx
    61  	} else {
    62  		// Pull out or create the mState for this event.
    63  		var ok bool
    64  		ms, ok = o.mStates[m]
    65  		if !ok {
    66  			ms = &mState{
    67  				g: NoGoroutine,
    68  				p: NoProc,
    69  			}
    70  			o.mStates[m] = ms
    71  		}
    72  		curCtx.P = ms.p
    73  		curCtx.G = ms.g
    74  		newCtx = curCtx
    75  	}
    76  
    77  	f := orderingDispatch[ev.typ]
    78  	if f == nil {
    79  		return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
    80  	}
    81  	newCtx, ok, err := f(o, ev, evt, m, gen, curCtx)
    82  	if err == nil && ok && ms != nil {
    83  		// Update the mState for this event.
    84  		ms.p = newCtx.P
    85  		ms.g = newCtx.G
    86  	}
    87  	return ok, err
    88  }
    89  
    90  type orderingHandleFunc func(o *ordering, ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error)
    91  
    92  var orderingDispatch = [256]orderingHandleFunc{
    93  	// Procs.
    94  	go122.EvProcsChange: (*ordering).advanceAnnotation,
    95  	go122.EvProcStart:   (*ordering).advanceProcStart,
    96  	go122.EvProcStop:    (*ordering).advanceProcStop,
    97  	go122.EvProcSteal:   (*ordering).advanceProcSteal,
    98  	go122.EvProcStatus:  (*ordering).advanceProcStatus,
    99  
   100  	// Goroutines.
   101  	go122.EvGoCreate:            (*ordering).advanceGoCreate,
   102  	go122.EvGoCreateSyscall:     (*ordering).advanceGoCreateSyscall,
   103  	go122.EvGoStart:             (*ordering).advanceGoStart,
   104  	go122.EvGoDestroy:           (*ordering).advanceGoStopExec,
   105  	go122.EvGoDestroySyscall:    (*ordering).advanceGoDestroySyscall,
   106  	go122.EvGoStop:              (*ordering).advanceGoStopExec,
   107  	go122.EvGoBlock:             (*ordering).advanceGoStopExec,
   108  	go122.EvGoUnblock:           (*ordering).advanceGoUnblock,
   109  	go122.EvGoSyscallBegin:      (*ordering).advanceGoSyscallBegin,
   110  	go122.EvGoSyscallEnd:        (*ordering).advanceGoSyscallEnd,
   111  	go122.EvGoSyscallEndBlocked: (*ordering).advanceGoSyscallEndBlocked,
   112  	go122.EvGoStatus:            (*ordering).advanceGoStatus,
   113  
   114  	// STW.
   115  	go122.EvSTWBegin: (*ordering).advanceGoRangeBegin,
   116  	go122.EvSTWEnd:   (*ordering).advanceGoRangeEnd,
   117  
   118  	// GC events.
   119  	go122.EvGCActive:           (*ordering).advanceGCActive,
   120  	go122.EvGCBegin:            (*ordering).advanceGCBegin,
   121  	go122.EvGCEnd:              (*ordering).advanceGCEnd,
   122  	go122.EvGCSweepActive:      (*ordering).advanceGCSweepActive,
   123  	go122.EvGCSweepBegin:       (*ordering).advanceGCSweepBegin,
   124  	go122.EvGCSweepEnd:         (*ordering).advanceGCSweepEnd,
   125  	go122.EvGCMarkAssistActive: (*ordering).advanceGoRangeActive,
   126  	go122.EvGCMarkAssistBegin:  (*ordering).advanceGoRangeBegin,
   127  	go122.EvGCMarkAssistEnd:    (*ordering).advanceGoRangeEnd,
   128  	go122.EvHeapAlloc:          (*ordering).advanceHeapMetric,
   129  	go122.EvHeapGoal:           (*ordering).advanceHeapMetric,
   130  
   131  	// Annotations.
   132  	go122.EvGoLabel:         (*ordering).advanceAnnotation,
   133  	go122.EvUserTaskBegin:   (*ordering).advanceUserTaskBegin,
   134  	go122.EvUserTaskEnd:     (*ordering).advanceUserTaskEnd,
   135  	go122.EvUserRegionBegin: (*ordering).advanceUserRegionBegin,
   136  	go122.EvUserRegionEnd:   (*ordering).advanceUserRegionEnd,
   137  	go122.EvUserLog:         (*ordering).advanceAnnotation,
   138  
   139  	// Coroutines. Added in Go 1.23.
   140  	go122.EvGoSwitch:        (*ordering).advanceGoSwitch,
   141  	go122.EvGoSwitchDestroy: (*ordering).advanceGoSwitch,
   142  	go122.EvGoCreateBlocked: (*ordering).advanceGoCreate,
   143  
   144  	// GoStatus event with a stack. Added in Go 1.23.
   145  	go122.EvGoStatusStack: (*ordering).advanceGoStatus,
   146  
   147  	// Experimental events.
   148  
   149  	// Experimental heap span events. Added in Go 1.23.
   150  	go122.EvSpan:      (*ordering).advanceAllocFree,
   151  	go122.EvSpanAlloc: (*ordering).advanceAllocFree,
   152  	go122.EvSpanFree:  (*ordering).advanceAllocFree,
   153  
   154  	// Experimental heap object events. Added in Go 1.23.
   155  	go122.EvHeapObject:      (*ordering).advanceAllocFree,
   156  	go122.EvHeapObjectAlloc: (*ordering).advanceAllocFree,
   157  	go122.EvHeapObjectFree:  (*ordering).advanceAllocFree,
   158  
   159  	// Experimental goroutine stack events. Added in Go 1.23.
   160  	go122.EvGoroutineStack:      (*ordering).advanceAllocFree,
   161  	go122.EvGoroutineStackAlloc: (*ordering).advanceAllocFree,
   162  	go122.EvGoroutineStackFree:  (*ordering).advanceAllocFree,
   163  }
   164  
   165  func (o *ordering) advanceProcStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   166  	pid := ProcID(ev.args[0])
   167  	status := go122.ProcStatus(ev.args[1])
   168  	if int(status) >= len(go122ProcStatus2ProcState) {
   169  		return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status)
   170  	}
   171  	oldState := go122ProcStatus2ProcState[status]
   172  	if s, ok := o.pStates[pid]; ok {
   173  		if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscall {
   174  			// ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a
   175  			// potential loss of information, but if we're already in ProcSyscall,
   176  			// we haven't lost the relevant information. Promote the status and advance.
   177  			oldState = ProcRunning
   178  			ev.args[1] = uint64(go122.ProcSyscall)
   179  		} else if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscallAbandoned {
   180  			// If we're passing through ProcSyscallAbandoned, then there's no promotion
   181  			// to do. We've lost the M that this P is associated with. However it got there,
   182  			// it's going to appear as idle in the API, so pass through as idle.
   183  			oldState = ProcIdle
   184  			ev.args[1] = uint64(go122.ProcSyscallAbandoned)
   185  		} else if s.status != status {
   186  			return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
   187  		}
   188  		s.seq = makeSeq(gen, 0) // Reset seq.
   189  	} else {
   190  		o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)}
   191  		if gen == o.initialGen {
   192  			oldState = ProcUndetermined
   193  		} else {
   194  			oldState = ProcNotExist
   195  		}
   196  	}
   197  	ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
   198  
   199  	// Bind the proc to the new context, if it's running.
   200  	newCtx := curCtx
   201  	if status == go122.ProcRunning || status == go122.ProcSyscall {
   202  		newCtx.P = pid
   203  	}
   204  	// If we're advancing through ProcSyscallAbandoned *but* oldState is running then we've
   205  	// promoted it to ProcSyscall. However, because it's ProcSyscallAbandoned, we know this
   206  	// P is about to get stolen and its status very likely isn't being emitted by the same
   207  	// thread it was bound to. Since this status is Running -> Running and Running is binding,
   208  	// we need to make sure we emit it in the right context: the context to which it is bound.
   209  	// Find it, and set our current context to it.
   210  	if status == go122.ProcSyscallAbandoned && oldState == ProcRunning {
   211  		// N.B. This is slow but it should be fairly rare.
   212  		found := false
   213  		for mid, ms := range o.mStates {
   214  			if ms.p == pid {
   215  				curCtx.M = mid
   216  				curCtx.P = pid
   217  				curCtx.G = ms.g
   218  				found = true
   219  			}
   220  		}
   221  		if !found {
   222  			return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
   223  		}
   224  	}
   225  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   226  	return newCtx, true, nil
   227  }
   228  
   229  func (o *ordering) advanceProcStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   230  	pid := ProcID(ev.args[0])
   231  	seq := makeSeq(gen, ev.args[1])
   232  
   233  	// Try to advance. We might fail here due to sequencing, because the P hasn't
   234  	// had a status emitted, or because we already have a P and we're in a syscall,
   235  	// and we haven't observed that it was stolen from us yet.
   236  	state, ok := o.pStates[pid]
   237  	if !ok || state.status != go122.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc {
   238  		// We can't make an inference as to whether this is bad. We could just be seeing
   239  		// a ProcStart on a different M before the proc's state was emitted, or before we
   240  		// got to the right point in the trace.
   241  		//
   242  		// Note that we also don't advance here if we have a P and we're in a syscall.
   243  		return curCtx, false, nil
   244  	}
   245  	// We can advance this P. Check some invariants.
   246  	//
   247  	// We might have a goroutine if a goroutine is exiting a syscall.
   248  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MayHave}
   249  	if err := validateCtx(curCtx, reqs); err != nil {
   250  		return curCtx, false, err
   251  	}
   252  	state.status = go122.ProcRunning
   253  	state.seq = seq
   254  	newCtx := curCtx
   255  	newCtx.P = pid
   256  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   257  	return newCtx, true, nil
   258  }
   259  
   260  func (o *ordering) advanceProcStop(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   261  	// We must be able to advance this P.
   262  	//
   263  	// There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P
   264  	// is stopped by the same M that started it, while ProcSteal is used when another M
   265  	// steals the P by stopping it from a distance.
   266  	//
   267  	// Since a P is bound to an M, and we're stopping on the same M we started, it must
   268  	// always be possible to advance the current M's P from a ProcStop. This is also why
   269  	// ProcStop doesn't need a sequence number.
   270  	state, ok := o.pStates[curCtx.P]
   271  	if !ok {
   272  		return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.P)
   273  	}
   274  	if state.status != go122.ProcRunning && state.status != go122.ProcSyscall {
   275  		return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(ev.typ), go122.ProcRunning, go122.ProcSyscall)
   276  	}
   277  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
   278  	if err := validateCtx(curCtx, reqs); err != nil {
   279  		return curCtx, false, err
   280  	}
   281  	state.status = go122.ProcIdle
   282  	newCtx := curCtx
   283  	newCtx.P = NoProc
   284  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   285  	return newCtx, true, nil
   286  }
   287  
   288  func (o *ordering) advanceProcSteal(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   289  	pid := ProcID(ev.args[0])
   290  	seq := makeSeq(gen, ev.args[1])
   291  	state, ok := o.pStates[pid]
   292  	if !ok || (state.status != go122.ProcSyscall && state.status != go122.ProcSyscallAbandoned) || !seq.succeeds(state.seq) {
   293  		// We can't make an inference as to whether this is bad. We could just be seeing
   294  		// a ProcStart on a different M before the proc's state was emitted, or before we
   295  		// got to the right point in the trace.
   296  		return curCtx, false, nil
   297  	}
   298  	// We can advance this P. Check some invariants.
   299  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave}
   300  	if err := validateCtx(curCtx, reqs); err != nil {
   301  		return curCtx, false, err
   302  	}
   303  	// Smuggle in the P state that let us advance so we can surface information to the event.
   304  	// Specifically, we need to make sure that the event is interpreted not as a transition of
   305  	// ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead.
   306  	//
   307  	// ProcRunning is binding, but we may be running with a P on the current M and we can't
   308  	// bind another P. This P is about to go ProcIdle anyway.
   309  	oldStatus := state.status
   310  	ev.extra(version.Go122)[0] = uint64(oldStatus)
   311  
   312  	// Update the P's status and sequence number.
   313  	state.status = go122.ProcIdle
   314  	state.seq = seq
   315  
   316  	// If we've lost information then don't try to do anything with the M.
   317  	// It may have moved on and we can't be sure.
   318  	if oldStatus == go122.ProcSyscallAbandoned {
   319  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   320  		return curCtx, true, nil
   321  	}
   322  
   323  	// Validate that the M we're stealing from is what we expect.
   324  	mid := ThreadID(ev.args[2]) // The M we're stealing from.
   325  
   326  	newCtx := curCtx
   327  	if mid == curCtx.M {
   328  		// We're stealing from ourselves. This behaves like a ProcStop.
   329  		if curCtx.P != pid {
   330  			return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
   331  		}
   332  		newCtx.P = NoProc
   333  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   334  		return newCtx, true, nil
   335  	}
   336  
   337  	// We're stealing from some other M.
   338  	mState, ok := o.mStates[mid]
   339  	if !ok {
   340  		return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
   341  	}
   342  
   343  	// Make sure we're actually stealing the right P.
   344  	if mState.p != pid {
   345  		return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
   346  	}
   347  
   348  	// Tell the M it has no P so it can proceed.
   349  	//
   350  	// This is safe because we know the P was in a syscall and
   351  	// the other M must be trying to get out of the syscall.
   352  	// GoSyscallEndBlocked cannot advance until the corresponding
   353  	// M loses its P.
   354  	mState.p = NoProc
   355  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   356  	return newCtx, true, nil
   357  }
   358  
   359  func (o *ordering) advanceGoStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   360  	gid := GoID(ev.args[0])
   361  	mid := ThreadID(ev.args[1])
   362  	status := go122.GoStatus(ev.args[2])
   363  
   364  	if int(status) >= len(go122GoStatus2GoState) {
   365  		return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status)
   366  	}
   367  	oldState := go122GoStatus2GoState[status]
   368  	if s, ok := o.gStates[gid]; ok {
   369  		if s.status != status {
   370  			return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
   371  		}
   372  		s.seq = makeSeq(gen, 0) // Reset seq.
   373  	} else if gen == o.initialGen {
   374  		// Set the state.
   375  		o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
   376  		oldState = GoUndetermined
   377  	} else {
   378  		return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
   379  	}
   380  	ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
   381  
   382  	newCtx := curCtx
   383  	switch status {
   384  	case go122.GoRunning:
   385  		// Bind the goroutine to the new context, since it's running.
   386  		newCtx.G = gid
   387  	case go122.GoSyscall:
   388  		if mid == NoThread {
   389  			return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
   390  		}
   391  		// Is the syscall on this thread? If so, bind it to the context.
   392  		// Otherwise, we're talking about a G sitting in a syscall on an M.
   393  		// Validate the named M.
   394  		if mid == curCtx.M {
   395  			if gen != o.initialGen && curCtx.G != gid {
   396  				// If this isn't the first generation, we *must* have seen this
   397  				// binding occur already. Even if the G was blocked in a syscall
   398  				// for multiple generations since trace start, we would have seen
   399  				// a previous GoStatus event that bound the goroutine to an M.
   400  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G)
   401  			}
   402  			newCtx.G = gid
   403  			break
   404  		}
   405  		// Now we're talking about a thread and goroutine that have been
   406  		// blocked on a syscall for the entire generation. This case must
   407  		// not have a P; the runtime makes sure that all Ps are traced at
   408  		// the beginning of a generation, which involves taking a P back
   409  		// from every thread.
   410  		ms, ok := o.mStates[mid]
   411  		if ok {
   412  			// This M has been seen. That means we must have seen this
   413  			// goroutine go into a syscall on this thread at some point.
   414  			if ms.g != gid {
   415  				// But the G on the M doesn't match. Something's wrong.
   416  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
   417  			}
   418  			// This case is just a Syscall->Syscall event, which needs to
   419  			// appear as having the G currently bound to this M.
   420  			curCtx.G = ms.g
   421  		} else if !ok {
   422  			// The M hasn't been seen yet. That means this goroutine
   423  			// has just been sitting in a syscall on this M. Create
   424  			// a state for it.
   425  			o.mStates[mid] = &mState{g: gid, p: NoProc}
   426  			// Don't set curCtx.G in this case because this event is the
   427  			// binding event (and curCtx represents the "before" state).
   428  		}
   429  		// Update the current context to the M we're talking about.
   430  		curCtx.M = mid
   431  	}
   432  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   433  	return newCtx, true, nil
   434  }
   435  
   436  func (o *ordering) advanceGoCreate(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   437  	// Goroutines must be created on a running P, but may or may not be created
   438  	// by a running goroutine.
   439  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
   440  	if err := validateCtx(curCtx, reqs); err != nil {
   441  		return curCtx, false, err
   442  	}
   443  	// If we have a goroutine, it must be running.
   444  	if state, ok := o.gStates[curCtx.G]; ok && state.status != go122.GoRunning {
   445  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   446  	}
   447  	// This goroutine created another. Add a state for it.
   448  	newgid := GoID(ev.args[0])
   449  	if _, ok := o.gStates[newgid]; ok {
   450  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
   451  	}
   452  	status := go122.GoRunnable
   453  	if ev.typ == go122.EvGoCreateBlocked {
   454  		status = go122.GoWaiting
   455  	}
   456  	o.gStates[newgid] = &gState{id: newgid, status: status, seq: makeSeq(gen, 0)}
   457  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   458  	return curCtx, true, nil
   459  }
   460  
   461  func (o *ordering) advanceGoStopExec(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   462  	// These are goroutine events that all require an active running
   463  	// goroutine on some thread. They must *always* be advance-able,
   464  	// since running goroutines are bound to their M.
   465  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   466  		return curCtx, false, err
   467  	}
   468  	state, ok := o.gStates[curCtx.G]
   469  	if !ok {
   470  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   471  	}
   472  	if state.status != go122.GoRunning {
   473  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   474  	}
   475  	// Handle each case slightly differently; we just group them together
   476  	// because they have shared preconditions.
   477  	newCtx := curCtx
   478  	switch ev.typ {
   479  	case go122.EvGoDestroy:
   480  		// This goroutine is exiting itself.
   481  		delete(o.gStates, curCtx.G)
   482  		newCtx.G = NoGoroutine
   483  	case go122.EvGoStop:
   484  		// Goroutine stopped (yielded). It's runnable but not running on this M.
   485  		state.status = go122.GoRunnable
   486  		newCtx.G = NoGoroutine
   487  	case go122.EvGoBlock:
   488  		// Goroutine blocked. It's waiting now and not running on this M.
   489  		state.status = go122.GoWaiting
   490  		newCtx.G = NoGoroutine
   491  	}
   492  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   493  	return newCtx, true, nil
   494  }
   495  
   496  func (o *ordering) advanceGoStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   497  	gid := GoID(ev.args[0])
   498  	seq := makeSeq(gen, ev.args[1])
   499  	state, ok := o.gStates[gid]
   500  	if !ok || state.status != go122.GoRunnable || !seq.succeeds(state.seq) {
   501  		// We can't make an inference as to whether this is bad. We could just be seeing
   502  		// a GoStart on a different M before the goroutine was created, before it had its
   503  		// state emitted, or before we got to the right point in the trace yet.
   504  		return curCtx, false, nil
   505  	}
   506  	// We can advance this goroutine. Check some invariants.
   507  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MustNotHave}
   508  	if err := validateCtx(curCtx, reqs); err != nil {
   509  		return curCtx, false, err
   510  	}
   511  	state.status = go122.GoRunning
   512  	state.seq = seq
   513  	newCtx := curCtx
   514  	newCtx.G = gid
   515  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   516  	return newCtx, true, nil
   517  }
   518  
   519  func (o *ordering) advanceGoUnblock(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   520  	// N.B. These both reference the goroutine to unblock, not the current goroutine.
   521  	gid := GoID(ev.args[0])
   522  	seq := makeSeq(gen, ev.args[1])
   523  	state, ok := o.gStates[gid]
   524  	if !ok || state.status != go122.GoWaiting || !seq.succeeds(state.seq) {
   525  		// We can't make an inference as to whether this is bad. We could just be seeing
   526  		// a GoUnblock on a different M before the goroutine was created and blocked itself,
   527  		// before it had its state emitted, or before we got to the right point in the trace yet.
   528  		return curCtx, false, nil
   529  	}
   530  	state.status = go122.GoRunnable
   531  	state.seq = seq
   532  	// N.B. No context to validate. Basically anything can unblock
   533  	// a goroutine (e.g. sysmon).
   534  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   535  	return curCtx, true, nil
   536  }
   537  
   538  func (o *ordering) advanceGoSwitch(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   539  	// GoSwitch and GoSwitchDestroy represent a trio of events:
   540  	// - Unblock of the goroutine to switch to.
   541  	// - Block or destroy of the current goroutine.
   542  	// - Start executing the next goroutine.
   543  	//
   544  	// Because it acts like a GoStart for the next goroutine, we can
   545  	// only advance it if the sequence numbers line up.
   546  	//
   547  	// The current goroutine on the thread must be actively running.
   548  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   549  		return curCtx, false, err
   550  	}
   551  	curGState, ok := o.gStates[curCtx.G]
   552  	if !ok {
   553  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   554  	}
   555  	if curGState.status != go122.GoRunning {
   556  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   557  	}
   558  	nextg := GoID(ev.args[0])
   559  	seq := makeSeq(gen, ev.args[1]) // seq is for nextg, not curCtx.G.
   560  	nextGState, ok := o.gStates[nextg]
   561  	if !ok || nextGState.status != go122.GoWaiting || !seq.succeeds(nextGState.seq) {
   562  		// We can't make an inference as to whether this is bad. We could just be seeing
   563  		// a GoSwitch on a different M before the goroutine was created, before it had its
   564  		// state emitted, or before we got to the right point in the trace yet.
   565  		return curCtx, false, nil
   566  	}
   567  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   568  
   569  	// Update the state of the executing goroutine and emit an event for it
   570  	// (GoSwitch and GoSwitchDestroy will be interpreted as GoUnblock events
   571  	// for nextg).
   572  	switch ev.typ {
   573  	case go122.EvGoSwitch:
   574  		// Goroutine blocked. It's waiting now and not running on this M.
   575  		curGState.status = go122.GoWaiting
   576  
   577  		// Emit a GoBlock event.
   578  		// TODO(mknyszek): Emit a reason.
   579  		o.queue.push(makeEvent(evt, curCtx, go122.EvGoBlock, ev.time, 0 /* no reason */, 0 /* no stack */))
   580  	case go122.EvGoSwitchDestroy:
   581  		// This goroutine is exiting itself.
   582  		delete(o.gStates, curCtx.G)
   583  
   584  		// Emit a GoDestroy event.
   585  		o.queue.push(makeEvent(evt, curCtx, go122.EvGoDestroy, ev.time))
   586  	}
   587  	// Update the state of the next goroutine.
   588  	nextGState.status = go122.GoRunning
   589  	nextGState.seq = seq
   590  	newCtx := curCtx
   591  	newCtx.G = nextg
   592  
   593  	// Queue an event for the next goroutine starting to run.
   594  	startCtx := curCtx
   595  	startCtx.G = NoGoroutine
   596  	o.queue.push(makeEvent(evt, startCtx, go122.EvGoStart, ev.time, uint64(nextg), ev.args[1]))
   597  	return newCtx, true, nil
   598  }
   599  
   600  func (o *ordering) advanceGoSyscallBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   601  	// Entering a syscall requires an active running goroutine with a
   602  	// proc on some thread. It is always advancable.
   603  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   604  		return curCtx, false, err
   605  	}
   606  	state, ok := o.gStates[curCtx.G]
   607  	if !ok {
   608  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   609  	}
   610  	if state.status != go122.GoRunning {
   611  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   612  	}
   613  	// Goroutine entered a syscall. It's still running on this P and M.
   614  	state.status = go122.GoSyscall
   615  	pState, ok := o.pStates[curCtx.P]
   616  	if !ok {
   617  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ))
   618  	}
   619  	pState.status = go122.ProcSyscall
   620  	// Validate the P sequence number on the event and advance it.
   621  	//
   622  	// We have a P sequence number for what is supposed to be a goroutine event
   623  	// so that we can correctly model P stealing. Without this sequence number here,
   624  	// the syscall from which a ProcSteal event is stealing can be ambiguous in the
   625  	// face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for
   626  	// more details.
   627  	//
   628  	// Note that because this sequence number only exists as a tool for disambiguation,
   629  	// we can enforce that we have the right sequence number at this point; we don't need
   630  	// to back off and see if any other events will advance. This is a running P.
   631  	pSeq := makeSeq(gen, ev.args[0])
   632  	if !pSeq.succeeds(pState.seq) {
   633  		return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(ev.typ), pState.seq, pSeq)
   634  	}
   635  	pState.seq = pSeq
   636  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   637  	return curCtx, true, nil
   638  }
   639  
   640  func (o *ordering) advanceGoSyscallEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   641  	// This event is always advance-able because it happens on the same
   642  	// thread that EvGoSyscallStart happened, and the goroutine can't leave
   643  	// that thread until its done.
   644  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   645  		return curCtx, false, err
   646  	}
   647  	state, ok := o.gStates[curCtx.G]
   648  	if !ok {
   649  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   650  	}
   651  	if state.status != go122.GoSyscall {
   652  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   653  	}
   654  	state.status = go122.GoRunning
   655  
   656  	// Transfer the P back to running from syscall.
   657  	pState, ok := o.pStates[curCtx.P]
   658  	if !ok {
   659  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ))
   660  	}
   661  	if pState.status != go122.ProcSyscall {
   662  		return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status)
   663  	}
   664  	pState.status = go122.ProcRunning
   665  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   666  	return curCtx, true, nil
   667  }
   668  
   669  func (o *ordering) advanceGoSyscallEndBlocked(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   670  	// This event becomes advanceable when its P is not in a syscall state
   671  	// (lack of a P altogether is also acceptable for advancing).
   672  	// The transfer out of ProcSyscall can happen either voluntarily via
   673  	// ProcStop or involuntarily via ProcSteal. We may also acquire a new P
   674  	// before we get here (after the transfer out) but that's OK: that new
   675  	// P won't be in the ProcSyscall state anymore.
   676  	//
   677  	// Basically: while we have a preemptible P, don't advance, because we
   678  	// *know* from the event that we're going to lose it at some point during
   679  	// the syscall. We shouldn't advance until that happens.
   680  	if curCtx.P != NoProc {
   681  		pState, ok := o.pStates[curCtx.P]
   682  		if !ok {
   683  			return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ))
   684  		}
   685  		if pState.status == go122.ProcSyscall {
   686  			return curCtx, false, nil
   687  		}
   688  	}
   689  	// As mentioned above, we may have a P here if we ProcStart
   690  	// before this event.
   691  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
   692  		return curCtx, false, err
   693  	}
   694  	state, ok := o.gStates[curCtx.G]
   695  	if !ok {
   696  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   697  	}
   698  	if state.status != go122.GoSyscall {
   699  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   700  	}
   701  	newCtx := curCtx
   702  	newCtx.G = NoGoroutine
   703  	state.status = go122.GoRunnable
   704  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   705  	return newCtx, true, nil
   706  }
   707  
   708  func (o *ordering) advanceGoCreateSyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   709  	// This event indicates that a goroutine is effectively
   710  	// being created out of a cgo callback. Such a goroutine
   711  	// is 'created' in the syscall state.
   712  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustNotHave}); err != nil {
   713  		return curCtx, false, err
   714  	}
   715  	// This goroutine is effectively being created. Add a state for it.
   716  	newgid := GoID(ev.args[0])
   717  	if _, ok := o.gStates[newgid]; ok {
   718  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
   719  	}
   720  	o.gStates[newgid] = &gState{id: newgid, status: go122.GoSyscall, seq: makeSeq(gen, 0)}
   721  	// Goroutine is executing. Bind it to the context.
   722  	newCtx := curCtx
   723  	newCtx.G = newgid
   724  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   725  	return newCtx, true, nil
   726  }
   727  
   728  func (o *ordering) advanceGoDestroySyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   729  	// This event indicates that a goroutine created for a
   730  	// cgo callback is disappearing, either because the callback
   731  	// ending or the C thread that called it is being destroyed.
   732  	//
   733  	// Also, treat this as if we lost our P too.
   734  	// The thread ID may be reused by the platform and we'll get
   735  	// really confused if we try to steal the P is this is running
   736  	// with later. The new M with the same ID could even try to
   737  	// steal back this P from itself!
   738  	//
   739  	// The runtime is careful to make sure that any GoCreateSyscall
   740  	// event will enter the runtime emitting events for reacquiring a P.
   741  	//
   742  	// Note: we might have a P here. The P might not be released
   743  	// eagerly by the runtime, and it might get stolen back later
   744  	// (or never again, if the program is going to exit).
   745  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
   746  		return curCtx, false, err
   747  	}
   748  	// Check to make sure the goroutine exists in the right state.
   749  	state, ok := o.gStates[curCtx.G]
   750  	if !ok {
   751  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   752  	}
   753  	if state.status != go122.GoSyscall {
   754  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(ev.typ), GoSyscall)
   755  	}
   756  	// This goroutine is exiting itself.
   757  	delete(o.gStates, curCtx.G)
   758  	newCtx := curCtx
   759  	newCtx.G = NoGoroutine
   760  
   761  	// If we have a proc, then we're dissociating from it now. See the comment at the top of the case.
   762  	if curCtx.P != NoProc {
   763  		pState, ok := o.pStates[curCtx.P]
   764  		if !ok {
   765  			return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, go122.EventString(ev.typ))
   766  		}
   767  		if pState.status != go122.ProcSyscall {
   768  			return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, go122.EventString(ev.typ))
   769  		}
   770  		// See the go122-create-syscall-reuse-thread-id test case for more details.
   771  		pState.status = go122.ProcSyscallAbandoned
   772  		newCtx.P = NoProc
   773  
   774  		// Queue an extra self-ProcSteal event.
   775  		extra := makeEvent(evt, curCtx, go122.EvProcSteal, ev.time, uint64(curCtx.P))
   776  		extra.base.extra(version.Go122)[0] = uint64(go122.ProcSyscall)
   777  		o.queue.push(extra)
   778  	}
   779  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   780  	return newCtx, true, nil
   781  }
   782  
   783  func (o *ordering) advanceUserTaskBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   784  	// Handle tasks. Tasks are interesting because:
   785  	// - There's no Begin event required to reference a task.
   786  	// - End for a particular task ID can appear multiple times.
   787  	// As a result, there's very little to validate. The only
   788  	// thing we have to be sure of is that a task didn't begin
   789  	// after it had already begun. Task IDs are allowed to be
   790  	// reused, so we don't care about a Begin after an End.
   791  	id := TaskID(ev.args[0])
   792  	if _, ok := o.activeTasks[id]; ok {
   793  		return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
   794  	}
   795  	// Get the parent ID, but don't validate it. There's no guarantee
   796  	// we actually have information on whether it's active.
   797  	parentID := TaskID(ev.args[1])
   798  	if parentID == BackgroundTask {
   799  		// Note: a value of 0 here actually means no parent, *not* the
   800  		// background task. Automatic background task attachment only
   801  		// applies to regions.
   802  		parentID = NoTask
   803  		ev.args[1] = uint64(NoTask)
   804  	}
   805  
   806  	// Validate the name and record it. We'll need to pass it through to
   807  	// EvUserTaskEnd.
   808  	nameID := stringID(ev.args[2])
   809  	name, ok := evt.strings.get(nameID)
   810  	if !ok {
   811  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   812  	}
   813  	o.activeTasks[id] = taskState{name: name, parentID: parentID}
   814  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   815  		return curCtx, false, err
   816  	}
   817  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   818  	return curCtx, true, nil
   819  }
   820  
   821  func (o *ordering) advanceUserTaskEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   822  	id := TaskID(ev.args[0])
   823  	if ts, ok := o.activeTasks[id]; ok {
   824  		// Smuggle the task info. This may happen in a different generation,
   825  		// which may not have the name in its string table. Add it to the extra
   826  		// strings table so we can look it up later.
   827  		ev.extra(version.Go122)[0] = uint64(ts.parentID)
   828  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name))
   829  		delete(o.activeTasks, id)
   830  	} else {
   831  		// Explicitly clear the task info.
   832  		ev.extra(version.Go122)[0] = uint64(NoTask)
   833  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
   834  	}
   835  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   836  		return curCtx, false, err
   837  	}
   838  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   839  	return curCtx, true, nil
   840  }
   841  
   842  func (o *ordering) advanceUserRegionBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   843  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   844  		return curCtx, false, err
   845  	}
   846  	tid := TaskID(ev.args[0])
   847  	nameID := stringID(ev.args[1])
   848  	name, ok := evt.strings.get(nameID)
   849  	if !ok {
   850  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   851  	}
   852  	gState, ok := o.gStates[curCtx.G]
   853  	if !ok {
   854  		return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G)
   855  	}
   856  	if err := gState.beginRegion(userRegion{tid, name}); err != nil {
   857  		return curCtx, false, err
   858  	}
   859  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   860  	return curCtx, true, nil
   861  }
   862  
   863  func (o *ordering) advanceUserRegionEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   864  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   865  		return curCtx, false, err
   866  	}
   867  	tid := TaskID(ev.args[0])
   868  	nameID := stringID(ev.args[1])
   869  	name, ok := evt.strings.get(nameID)
   870  	if !ok {
   871  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   872  	}
   873  	gState, ok := o.gStates[curCtx.G]
   874  	if !ok {
   875  		return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G)
   876  	}
   877  	if err := gState.endRegion(userRegion{tid, name}); err != nil {
   878  		return curCtx, false, err
   879  	}
   880  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   881  	return curCtx, true, nil
   882  }
   883  
   884  // Handle the GC mark phase.
   885  //
   886  // We have sequence numbers for both start and end because they
   887  // can happen on completely different threads. We want an explicit
   888  // partial order edge between start and end here, otherwise we're
   889  // relying entirely on timestamps to make sure we don't advance a
   890  // GCEnd for a _different_ GC cycle if timestamps are wildly broken.
   891  func (o *ordering) advanceGCActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   892  	seq := ev.args[0]
   893  	if gen == o.initialGen {
   894  		if o.gcState != gcUndetermined {
   895  			return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
   896  		}
   897  		o.gcSeq = seq
   898  		o.gcState = gcRunning
   899  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   900  		return curCtx, true, nil
   901  	}
   902  	if seq != o.gcSeq+1 {
   903  		// This is not the right GC cycle.
   904  		return curCtx, false, nil
   905  	}
   906  	if o.gcState != gcRunning {
   907  		return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
   908  	}
   909  	o.gcSeq = seq
   910  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   911  		return curCtx, false, err
   912  	}
   913  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   914  	return curCtx, true, nil
   915  }
   916  
   917  func (o *ordering) advanceGCBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   918  	seq := ev.args[0]
   919  	if o.gcState == gcUndetermined {
   920  		o.gcSeq = seq
   921  		o.gcState = gcRunning
   922  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   923  		return curCtx, true, nil
   924  	}
   925  	if seq != o.gcSeq+1 {
   926  		// This is not the right GC cycle.
   927  		return curCtx, false, nil
   928  	}
   929  	if o.gcState == gcRunning {
   930  		return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
   931  	}
   932  	o.gcSeq = seq
   933  	o.gcState = gcRunning
   934  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   935  		return curCtx, false, err
   936  	}
   937  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   938  	return curCtx, true, nil
   939  }
   940  
   941  func (o *ordering) advanceGCEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   942  	seq := ev.args[0]
   943  	if seq != o.gcSeq+1 {
   944  		// This is not the right GC cycle.
   945  		return curCtx, false, nil
   946  	}
   947  	if o.gcState == gcNotRunning {
   948  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
   949  	}
   950  	if o.gcState == gcUndetermined {
   951  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
   952  	}
   953  	o.gcSeq = seq
   954  	o.gcState = gcNotRunning
   955  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   956  		return curCtx, false, err
   957  	}
   958  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   959  	return curCtx, true, nil
   960  }
   961  
   962  func (o *ordering) advanceAnnotation(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   963  	// Handle simple instantaneous events that require a G.
   964  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   965  		return curCtx, false, err
   966  	}
   967  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   968  	return curCtx, true, nil
   969  }
   970  
   971  func (o *ordering) advanceHeapMetric(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   972  	// Handle allocation metrics, which don't require a G.
   973  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
   974  		return curCtx, false, err
   975  	}
   976  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   977  	return curCtx, true, nil
   978  }
   979  
   980  func (o *ordering) advanceGCSweepBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   981  	// Handle sweep, which is bound to a P and doesn't require a G.
   982  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
   983  		return curCtx, false, err
   984  	}
   985  	if err := o.pStates[curCtx.P].beginRange(makeRangeType(ev.typ, 0)); err != nil {
   986  		return curCtx, false, err
   987  	}
   988  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   989  	return curCtx, true, nil
   990  }
   991  
   992  func (o *ordering) advanceGCSweepActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   993  	pid := ProcID(ev.args[0])
   994  	// N.B. In practice Ps can't block while they're sweeping, so this can only
   995  	// ever reference curCtx.P. However, be lenient about this like we are with
   996  	// GCMarkAssistActive; there's no reason the runtime couldn't change to block
   997  	// in the middle of a sweep.
   998  	pState, ok := o.pStates[pid]
   999  	if !ok {
  1000  		return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid)
  1001  	}
  1002  	if err := pState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
  1003  		return curCtx, false, err
  1004  	}
  1005  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1006  	return curCtx, true, nil
  1007  }
  1008  
  1009  func (o *ordering) advanceGCSweepEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1010  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
  1011  		return curCtx, false, err
  1012  	}
  1013  	_, err := o.pStates[curCtx.P].endRange(ev.typ)
  1014  	if err != nil {
  1015  		return curCtx, false, err
  1016  	}
  1017  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1018  	return curCtx, true, nil
  1019  }
  1020  
  1021  func (o *ordering) advanceGoRangeBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1022  	// Handle special goroutine-bound event ranges.
  1023  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
  1024  		return curCtx, false, err
  1025  	}
  1026  	desc := stringID(0)
  1027  	if ev.typ == go122.EvSTWBegin {
  1028  		desc = stringID(ev.args[0])
  1029  	}
  1030  	gState, ok := o.gStates[curCtx.G]
  1031  	if !ok {
  1032  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
  1033  	}
  1034  	if err := gState.beginRange(makeRangeType(ev.typ, desc)); err != nil {
  1035  		return curCtx, false, err
  1036  	}
  1037  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1038  	return curCtx, true, nil
  1039  }
  1040  
  1041  func (o *ordering) advanceGoRangeActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1042  	gid := GoID(ev.args[0])
  1043  	// N.B. Like GoStatus, this can happen at any time, because it can
  1044  	// reference a non-running goroutine. Don't check anything about the
  1045  	// current scheduler context.
  1046  	gState, ok := o.gStates[gid]
  1047  	if !ok {
  1048  		return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, go122.EventString(ev.typ))
  1049  	}
  1050  	if err := gState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
  1051  		return curCtx, false, err
  1052  	}
  1053  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1054  	return curCtx, true, nil
  1055  }
  1056  
  1057  func (o *ordering) advanceGoRangeEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1058  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
  1059  		return curCtx, false, err
  1060  	}
  1061  	gState, ok := o.gStates[curCtx.G]
  1062  	if !ok {
  1063  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
  1064  	}
  1065  	desc, err := gState.endRange(ev.typ)
  1066  	if err != nil {
  1067  		return curCtx, false, err
  1068  	}
  1069  	if ev.typ == go122.EvSTWEnd {
  1070  		// Smuggle the kind into the event.
  1071  		// Don't use ev.extra here so we have symmetry with STWBegin.
  1072  		ev.args[0] = uint64(desc)
  1073  	}
  1074  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1075  	return curCtx, true, nil
  1076  }
  1077  
  1078  func (o *ordering) advanceAllocFree(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1079  	// Handle simple instantaneous events that may or may not have a P.
  1080  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave}); err != nil {
  1081  		return curCtx, false, err
  1082  	}
  1083  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1084  	return curCtx, true, nil
  1085  }
  1086  
  1087  // Next returns the next event in the ordering.
  1088  func (o *ordering) Next() (Event, bool) {
  1089  	return o.queue.pop()
  1090  }
  1091  
  1092  // schedCtx represents the scheduling resources associated with an event.
  1093  type schedCtx struct {
  1094  	G GoID
  1095  	P ProcID
  1096  	M ThreadID
  1097  }
  1098  
  1099  // validateCtx ensures that ctx conforms to some reqs, returning an error if
  1100  // it doesn't.
  1101  func validateCtx(ctx schedCtx, reqs event.SchedReqs) error {
  1102  	// Check thread requirements.
  1103  	if reqs.Thread == event.MustHave && ctx.M == NoThread {
  1104  		return fmt.Errorf("expected a thread but didn't have one")
  1105  	} else if reqs.Thread == event.MustNotHave && ctx.M != NoThread {
  1106  		return fmt.Errorf("expected no thread but had one")
  1107  	}
  1108  
  1109  	// Check proc requirements.
  1110  	if reqs.Proc == event.MustHave && ctx.P == NoProc {
  1111  		return fmt.Errorf("expected a proc but didn't have one")
  1112  	} else if reqs.Proc == event.MustNotHave && ctx.P != NoProc {
  1113  		return fmt.Errorf("expected no proc but had one")
  1114  	}
  1115  
  1116  	// Check goroutine requirements.
  1117  	if reqs.Goroutine == event.MustHave && ctx.G == NoGoroutine {
  1118  		return fmt.Errorf("expected a goroutine but didn't have one")
  1119  	} else if reqs.Goroutine == event.MustNotHave && ctx.G != NoGoroutine {
  1120  		return fmt.Errorf("expected no goroutine but had one")
  1121  	}
  1122  	return nil
  1123  }
  1124  
  1125  // gcState is a trinary variable for the current state of the GC.
  1126  //
  1127  // The third state besides "enabled" and "disabled" is "undetermined."
  1128  type gcState uint8
  1129  
  1130  const (
  1131  	gcUndetermined gcState = iota
  1132  	gcNotRunning
  1133  	gcRunning
  1134  )
  1135  
  1136  // String returns a human-readable string for the GC state.
  1137  func (s gcState) String() string {
  1138  	switch s {
  1139  	case gcUndetermined:
  1140  		return "Undetermined"
  1141  	case gcNotRunning:
  1142  		return "NotRunning"
  1143  	case gcRunning:
  1144  		return "Running"
  1145  	}
  1146  	return "Bad"
  1147  }
  1148  
  1149  // userRegion represents a unique user region when attached to some gState.
  1150  type userRegion struct {
  1151  	// name must be a resolved string because the string ID for the same
  1152  	// string may change across generations, but we care about checking
  1153  	// the value itself.
  1154  	taskID TaskID
  1155  	name   string
  1156  }
  1157  
  1158  // rangeType is a way to classify special ranges of time.
  1159  //
  1160  // These typically correspond 1:1 with "Begin" events, but
  1161  // they may have an optional subtype that describes the range
  1162  // in more detail.
  1163  type rangeType struct {
  1164  	typ  event.Type // "Begin" event.
  1165  	desc stringID   // Optional subtype.
  1166  }
  1167  
  1168  // makeRangeType constructs a new rangeType.
  1169  func makeRangeType(typ event.Type, desc stringID) rangeType {
  1170  	if styp := go122.Specs()[typ].StartEv; styp != go122.EvNone {
  1171  		typ = styp
  1172  	}
  1173  	return rangeType{typ, desc}
  1174  }
  1175  
  1176  // gState is the state of a goroutine at a point in the trace.
  1177  type gState struct {
  1178  	id     GoID
  1179  	status go122.GoStatus
  1180  	seq    seqCounter
  1181  
  1182  	// regions are the active user regions for this goroutine.
  1183  	regions []userRegion
  1184  
  1185  	// rangeState is the state of special time ranges bound to this goroutine.
  1186  	rangeState
  1187  }
  1188  
  1189  // beginRegion starts a user region on the goroutine.
  1190  func (s *gState) beginRegion(r userRegion) error {
  1191  	s.regions = append(s.regions, r)
  1192  	return nil
  1193  }
  1194  
  1195  // endRegion ends a user region on the goroutine.
  1196  func (s *gState) endRegion(r userRegion) error {
  1197  	if len(s.regions) == 0 {
  1198  		// We do not know about regions that began before tracing started.
  1199  		return nil
  1200  	}
  1201  	if next := s.regions[len(s.regions)-1]; next != r {
  1202  		return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next)
  1203  	}
  1204  	s.regions = s.regions[:len(s.regions)-1]
  1205  	return nil
  1206  }
  1207  
  1208  // pState is the state of a proc at a point in the trace.
  1209  type pState struct {
  1210  	id     ProcID
  1211  	status go122.ProcStatus
  1212  	seq    seqCounter
  1213  
  1214  	// rangeState is the state of special time ranges bound to this proc.
  1215  	rangeState
  1216  }
  1217  
  1218  // mState is the state of a thread at a point in the trace.
  1219  type mState struct {
  1220  	g GoID   // Goroutine bound to this M. (The goroutine's state is Executing.)
  1221  	p ProcID // Proc bound to this M. (The proc's state is Executing.)
  1222  }
  1223  
  1224  // rangeState represents the state of special time ranges.
  1225  type rangeState struct {
  1226  	// inFlight contains the rangeTypes of any ranges bound to a resource.
  1227  	inFlight []rangeType
  1228  }
  1229  
  1230  // beginRange begins a special range in time on the goroutine.
  1231  //
  1232  // Returns an error if the range is already in progress.
  1233  func (s *rangeState) beginRange(typ rangeType) error {
  1234  	if s.hasRange(typ) {
  1235  		return fmt.Errorf("discovered event already in-flight for when starting event %v", go122.Specs()[typ.typ].Name)
  1236  	}
  1237  	s.inFlight = append(s.inFlight, typ)
  1238  	return nil
  1239  }
  1240  
  1241  // activeRange marks special range in time on the goroutine as active in the
  1242  // initial generation, or confirms that it is indeed active in later generations.
  1243  func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error {
  1244  	if isInitialGen {
  1245  		if s.hasRange(typ) {
  1246  			return fmt.Errorf("found named active range already in first gen: %v", typ)
  1247  		}
  1248  		s.inFlight = append(s.inFlight, typ)
  1249  	} else if !s.hasRange(typ) {
  1250  		return fmt.Errorf("resource is missing active range: %v %v", go122.Specs()[typ.typ].Name, s.inFlight)
  1251  	}
  1252  	return nil
  1253  }
  1254  
  1255  // hasRange returns true if a special time range on the goroutine as in progress.
  1256  func (s *rangeState) hasRange(typ rangeType) bool {
  1257  	for _, ftyp := range s.inFlight {
  1258  		if ftyp == typ {
  1259  			return true
  1260  		}
  1261  	}
  1262  	return false
  1263  }
  1264  
  1265  // endRange ends a special range in time on the goroutine.
  1266  //
  1267  // This must line up with the start event type  of the range the goroutine is currently in.
  1268  func (s *rangeState) endRange(typ event.Type) (stringID, error) {
  1269  	st := go122.Specs()[typ].StartEv
  1270  	idx := -1
  1271  	for i, r := range s.inFlight {
  1272  		if r.typ == st {
  1273  			idx = i
  1274  			break
  1275  		}
  1276  	}
  1277  	if idx < 0 {
  1278  		return 0, fmt.Errorf("tried to end event %v, but not in-flight", go122.Specs()[st].Name)
  1279  	}
  1280  	// Swap remove.
  1281  	desc := s.inFlight[idx].desc
  1282  	s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx]
  1283  	s.inFlight = s.inFlight[:len(s.inFlight)-1]
  1284  	return desc, nil
  1285  }
  1286  
  1287  // seqCounter represents a global sequence counter for a resource.
  1288  type seqCounter struct {
  1289  	gen uint64 // The generation for the local sequence counter seq.
  1290  	seq uint64 // The sequence number local to the generation.
  1291  }
  1292  
  1293  // makeSeq creates a new seqCounter.
  1294  func makeSeq(gen, seq uint64) seqCounter {
  1295  	return seqCounter{gen: gen, seq: seq}
  1296  }
  1297  
  1298  // succeeds returns true if a is the immediate successor of b.
  1299  func (a seqCounter) succeeds(b seqCounter) bool {
  1300  	return a.gen == b.gen && a.seq == b.seq+1
  1301  }
  1302  
  1303  // String returns a debug string representation of the seqCounter.
  1304  func (c seqCounter) String() string {
  1305  	return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen)
  1306  }
  1307  
  1308  func dumpOrdering(order *ordering) string {
  1309  	var sb strings.Builder
  1310  	for id, state := range order.gStates {
  1311  		fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq)
  1312  	}
  1313  	fmt.Fprintln(&sb)
  1314  	for id, state := range order.pStates {
  1315  		fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq)
  1316  	}
  1317  	fmt.Fprintln(&sb)
  1318  	for id, state := range order.mStates {
  1319  		fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p)
  1320  	}
  1321  	fmt.Fprintln(&sb)
  1322  	fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState)
  1323  	return sb.String()
  1324  }
  1325  
  1326  // taskState represents an active task.
  1327  type taskState struct {
  1328  	// name is the type of the active task.
  1329  	name string
  1330  
  1331  	// parentID is the parent ID of the active task.
  1332  	parentID TaskID
  1333  }
  1334  
  1335  // queue implements a growable ring buffer with a queue API.
  1336  type queue[T any] struct {
  1337  	start, end int
  1338  	buf        []T
  1339  }
  1340  
  1341  // push adds a new event to the back of the queue.
  1342  func (q *queue[T]) push(value T) {
  1343  	if q.end-q.start == len(q.buf) {
  1344  		q.grow()
  1345  	}
  1346  	q.buf[q.end%len(q.buf)] = value
  1347  	q.end++
  1348  }
  1349  
  1350  // grow increases the size of the queue.
  1351  func (q *queue[T]) grow() {
  1352  	if len(q.buf) == 0 {
  1353  		q.buf = make([]T, 2)
  1354  		return
  1355  	}
  1356  
  1357  	// Create new buf and copy data over.
  1358  	newBuf := make([]T, len(q.buf)*2)
  1359  	pivot := q.start % len(q.buf)
  1360  	first, last := q.buf[pivot:], q.buf[:pivot]
  1361  	copy(newBuf[:len(first)], first)
  1362  	copy(newBuf[len(first):], last)
  1363  
  1364  	// Update the queue state.
  1365  	q.start = 0
  1366  	q.end = len(q.buf)
  1367  	q.buf = newBuf
  1368  }
  1369  
  1370  // pop removes an event from the front of the queue. If the
  1371  // queue is empty, it returns an EventBad event.
  1372  func (q *queue[T]) pop() (T, bool) {
  1373  	if q.end-q.start == 0 {
  1374  		return *new(T), false
  1375  	}
  1376  	elem := &q.buf[q.start%len(q.buf)]
  1377  	value := *elem
  1378  	*elem = *new(T) // Clear the entry before returning, so we don't hold onto old tables.
  1379  	q.start++
  1380  	return value, true
  1381  }
  1382  
  1383  // makeEvent creates an Event from the provided information.
  1384  //
  1385  // It's just a convenience function; it's always OK to construct
  1386  // an Event manually if this isn't quite the right way to express
  1387  // the contents of the event.
  1388  func makeEvent(table *evTable, ctx schedCtx, typ event.Type, time Time, args ...uint64) Event {
  1389  	ev := Event{
  1390  		table: table,
  1391  		ctx:   ctx,
  1392  		base: baseEvent{
  1393  			typ:  typ,
  1394  			time: time,
  1395  		},
  1396  	}
  1397  	copy(ev.base.args[:], args)
  1398  	return ev
  1399  }
  1400  

View as plain text