...

Source file src/cmd/vendor/golang.org/x/telemetry/internal/crashmonitor/monitor.go

Documentation: cmd/vendor/golang.org/x/telemetry/internal/crashmonitor

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package crashmonitor
     6  
     7  // This file defines a monitor that reports arbitrary Go runtime
     8  // crashes to telemetry.
     9  
    10  import (
    11  	"bytes"
    12  	"fmt"
    13  	"io"
    14  	"log"
    15  	"os"
    16  	"reflect"
    17  	"runtime/debug"
    18  	"strconv"
    19  	"strings"
    20  
    21  	"golang.org/x/telemetry/internal/counter"
    22  )
    23  
    24  // Supported reports whether the runtime supports [runtime/debug.SetCrashOutput].
    25  //
    26  // TODO(adonovan): eliminate once go1.23+ is assured.
    27  func Supported() bool { return setCrashOutput != nil }
    28  
    29  var setCrashOutput func(*os.File) error // = runtime/debug.SetCrashOutput on go1.23+
    30  
    31  // Parent sets up the parent side of the crashmonitor. It requires
    32  // exclusive use of a writable pipe connected to the child process's stdin.
    33  func Parent(pipe *os.File) {
    34  	writeSentinel(pipe)
    35  	// Ensure that we get pc=0x%x values in the traceback.
    36  	debug.SetTraceback("system")
    37  	setCrashOutput(pipe)
    38  }
    39  
    40  // Child runs the part of the crashmonitor that runs in the child process.
    41  // It expects its stdin to be connected via a pipe to the parent which has
    42  // run Parent.
    43  func Child() {
    44  	// Wait for parent process's dying gasp.
    45  	// If the parent dies for any reason this read will return.
    46  	data, err := io.ReadAll(os.Stdin)
    47  	if err != nil {
    48  		log.Fatalf("failed to read from input pipe: %v", err)
    49  	}
    50  
    51  	// If the only line is the sentinel, it wasn't a crash.
    52  	if bytes.Count(data, []byte("\n")) < 2 {
    53  		childExitHook()
    54  		os.Exit(0) // parent exited without crash report
    55  	}
    56  
    57  	log.Printf("parent reported crash:\n%s", data)
    58  
    59  	// Parse the stack out of the crash report
    60  	// and record a telemetry count for it.
    61  	name, err := telemetryCounterName(data)
    62  	if err != nil {
    63  		// Keep count of how often this happens
    64  		// so that we can investigate if necessary.
    65  		incrementCounter("crash/malformed")
    66  
    67  		// Something went wrong.
    68  		// Save the crash securely in the file system.
    69  		f, err := os.CreateTemp(os.TempDir(), "*.crash")
    70  		if err != nil {
    71  			log.Fatal(err)
    72  		}
    73  		if _, err := f.Write(data); err != nil {
    74  			log.Fatal(err)
    75  		}
    76  		if err := f.Close(); err != nil {
    77  			log.Fatal(err)
    78  		}
    79  		log.Printf("failed to report crash to telemetry: %v", err)
    80  		log.Fatalf("crash report saved at %s", f.Name())
    81  	}
    82  
    83  	incrementCounter(name)
    84  
    85  	childExitHook()
    86  	log.Fatalf("telemetry crash recorded")
    87  }
    88  
    89  // (stubbed by test)
    90  var (
    91  	incrementCounter = func(name string) { counter.New(name).Inc() }
    92  	childExitHook    = func() {}
    93  )
    94  
    95  // The sentinel function returns its address. The difference between
    96  // this value as observed by calls in two different processes of the
    97  // same executable tells us the relative offset of their text segments.
    98  //
    99  // It would be nice if SetCrashOutput took care of this as it's fiddly
   100  // and likely to confuse every user at first.
   101  func sentinel() uint64 {
   102  	return uint64(reflect.ValueOf(sentinel).Pointer())
   103  }
   104  
   105  func writeSentinel(out io.Writer) {
   106  	fmt.Fprintf(out, "sentinel %x\n", sentinel())
   107  }
   108  
   109  // telemetryCounterName parses a crash report produced by the Go
   110  // runtime, extracts the stack of the first runnable goroutine,
   111  // converts each line into telemetry form ("symbol:relative-line"),
   112  // and returns this as the name of a counter.
   113  func telemetryCounterName(crash []byte) (string, error) {
   114  	pcs, err := parseStackPCs(string(crash))
   115  	if err != nil {
   116  		return "", err
   117  	}
   118  
   119  	// Limit the number of frames we request.
   120  	pcs = pcs[:min(len(pcs), 16)]
   121  
   122  	if len(pcs) == 0 {
   123  		// This can occur if all goroutines are idle, as when
   124  		// caught in a deadlock, or killed by an async signal
   125  		// while blocked.
   126  		//
   127  		// TODO(adonovan): consider how to report such
   128  		// situations. Reporting a goroutine in [sleep] or
   129  		// [select] state could be quite confusing without
   130  		// further information about the nature of the crash,
   131  		// as the problem is not local to the code location.
   132  		//
   133  		// For now, we keep count of this situation so that we
   134  		// can access whether it needs a more involved solution.
   135  		return "crash/no-running-goroutine", nil
   136  	}
   137  
   138  	// This string appears at the start of all
   139  	// crashmonitor-generated counter names.
   140  	//
   141  	// It is tempting to expose this as a parameter of Start, but
   142  	// it is not without risk. What value should most programs
   143  	// provide? There's no point giving the name of the executable
   144  	// as this is already recorded by telemetry. What if the
   145  	// application runs in multiple modes? Then it might be useful
   146  	// to record the mode. The problem is that an application with
   147  	// multiple modes probably doesn't know its mode by line 1 of
   148  	// main.main: it might require flag or argument parsing, or
   149  	// even validation of an environment variable, and we really
   150  	// want to steer users aware from any logic before Start. The
   151  	// flags and arguments will be wrong in the child process, and
   152  	// every extra conditional branch creates a risk that the
   153  	// recursively executed child program will behave not like the
   154  	// monitor but like the application. If the child process
   155  	// exits before calling Start, then the parent application
   156  	// will not have a monitor, and its crash reports will be
   157  	// discarded (written in to a pipe that is never read).
   158  	//
   159  	// So for now, we use this constant string.
   160  	const prefix = "crash/crash"
   161  	return counter.EncodeStack(pcs, prefix), nil
   162  }
   163  
   164  // parseStackPCs parses the parent process's program counters for the
   165  // first running goroutine out of a GOTRACEBACK=system traceback,
   166  // adjusting them so that they are valid for the child process's text
   167  // segment.
   168  //
   169  // This function returns only program counter values, ensuring that
   170  // there is no possibility of strings from the crash report (which may
   171  // contain PII) leaking into the telemetry system.
   172  func parseStackPCs(crash string) ([]uintptr, error) {
   173  	// getPC parses the PC out of a line of the form:
   174  	//     \tFILE:LINE +0xRELPC sp=... fp=... pc=...
   175  	getPC := func(line string) (uint64, error) {
   176  		_, pcstr, ok := strings.Cut(line, " pc=") // e.g. pc=0x%x
   177  		if !ok {
   178  			return 0, fmt.Errorf("no pc= for stack frame: %s", line)
   179  		}
   180  		return strconv.ParseUint(pcstr, 0, 64) // 0 => allow 0x prefix
   181  	}
   182  
   183  	var (
   184  		pcs            []uintptr
   185  		parentSentinel uint64
   186  		childSentinel  = sentinel()
   187  		on             = false // are we in the first running goroutine?
   188  		lines          = strings.Split(crash, "\n")
   189  	)
   190  	for i := 0; i < len(lines); i++ {
   191  		line := lines[i]
   192  
   193  		// Read sentinel value.
   194  		if parentSentinel == 0 && strings.HasPrefix(line, "sentinel ") {
   195  			_, err := fmt.Sscanf(line, "sentinel %x", &parentSentinel)
   196  			if err != nil {
   197  				return nil, fmt.Errorf("can't read sentinel line")
   198  			}
   199  			continue
   200  		}
   201  
   202  		// Search for "goroutine GID [STATUS]"
   203  		if !on {
   204  			if strings.HasPrefix(line, "goroutine ") &&
   205  				strings.Contains(line, " [running]:") {
   206  				on = true
   207  
   208  				if parentSentinel == 0 {
   209  					return nil, fmt.Errorf("no sentinel value in crash report")
   210  				}
   211  			}
   212  			continue
   213  		}
   214  
   215  		// A blank line marks end of a goroutine stack.
   216  		if line == "" {
   217  			break
   218  		}
   219  
   220  		// Skip the final "created by SYMBOL in goroutine GID" part.
   221  		if strings.HasPrefix(line, "created by ") {
   222  			break
   223  		}
   224  
   225  		// Expect a pair of lines:
   226  		//   SYMBOL(ARGS)
   227  		//   \tFILE:LINE +0xRELPC sp=0x%x fp=0x%x pc=0x%x
   228  		// Note: SYMBOL may contain parens "pkg.(*T).method"
   229  		// The RELPC is sometimes missing.
   230  
   231  		// Skip the symbol(args) line.
   232  		i++
   233  		if i == len(lines) {
   234  			break
   235  		}
   236  		line = lines[i]
   237  
   238  		// Parse the PC, and correct for the parent and child's
   239  		// different mappings of the text section.
   240  		pc, err := getPC(line)
   241  		if err != nil {
   242  			// Inlined frame, perhaps; skip it.
   243  			continue
   244  		}
   245  		pcs = append(pcs, uintptr(pc-parentSentinel+childSentinel))
   246  	}
   247  	return pcs, nil
   248  }
   249  
   250  func min(x, y int) int {
   251  	if x < y {
   252  		return x
   253  	} else {
   254  		return y
   255  	}
   256  }
   257  

View as plain text