1 // Copyright 2024 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package crashmonitor 6 7 // This file defines a monitor that reports arbitrary Go runtime 8 // crashes to telemetry. 9 10 import ( 11 "bytes" 12 "fmt" 13 "io" 14 "log" 15 "os" 16 "reflect" 17 "runtime/debug" 18 "strconv" 19 "strings" 20 21 "golang.org/x/telemetry/internal/counter" 22 ) 23 24 // Supported reports whether the runtime supports [runtime/debug.SetCrashOutput]. 25 // 26 // TODO(adonovan): eliminate once go1.23+ is assured. 27 func Supported() bool { return setCrashOutput != nil } 28 29 var setCrashOutput func(*os.File) error // = runtime/debug.SetCrashOutput on go1.23+ 30 31 // Parent sets up the parent side of the crashmonitor. It requires 32 // exclusive use of a writable pipe connected to the child process's stdin. 33 func Parent(pipe *os.File) { 34 writeSentinel(pipe) 35 // Ensure that we get pc=0x%x values in the traceback. 36 debug.SetTraceback("system") 37 setCrashOutput(pipe) 38 } 39 40 // Child runs the part of the crashmonitor that runs in the child process. 41 // It expects its stdin to be connected via a pipe to the parent which has 42 // run Parent. 43 func Child() { 44 // Wait for parent process's dying gasp. 45 // If the parent dies for any reason this read will return. 46 data, err := io.ReadAll(os.Stdin) 47 if err != nil { 48 log.Fatalf("failed to read from input pipe: %v", err) 49 } 50 51 // If the only line is the sentinel, it wasn't a crash. 52 if bytes.Count(data, []byte("\n")) < 2 { 53 childExitHook() 54 os.Exit(0) // parent exited without crash report 55 } 56 57 log.Printf("parent reported crash:\n%s", data) 58 59 // Parse the stack out of the crash report 60 // and record a telemetry count for it. 61 name, err := telemetryCounterName(data) 62 if err != nil { 63 // Keep count of how often this happens 64 // so that we can investigate if necessary. 65 incrementCounter("crash/malformed") 66 67 // Something went wrong. 68 // Save the crash securely in the file system. 69 f, err := os.CreateTemp(os.TempDir(), "*.crash") 70 if err != nil { 71 log.Fatal(err) 72 } 73 if _, err := f.Write(data); err != nil { 74 log.Fatal(err) 75 } 76 if err := f.Close(); err != nil { 77 log.Fatal(err) 78 } 79 log.Printf("failed to report crash to telemetry: %v", err) 80 log.Fatalf("crash report saved at %s", f.Name()) 81 } 82 83 incrementCounter(name) 84 85 childExitHook() 86 log.Fatalf("telemetry crash recorded") 87 } 88 89 // (stubbed by test) 90 var ( 91 incrementCounter = func(name string) { counter.New(name).Inc() } 92 childExitHook = func() {} 93 ) 94 95 // The sentinel function returns its address. The difference between 96 // this value as observed by calls in two different processes of the 97 // same executable tells us the relative offset of their text segments. 98 // 99 // It would be nice if SetCrashOutput took care of this as it's fiddly 100 // and likely to confuse every user at first. 101 func sentinel() uint64 { 102 return uint64(reflect.ValueOf(sentinel).Pointer()) 103 } 104 105 func writeSentinel(out io.Writer) { 106 fmt.Fprintf(out, "sentinel %x\n", sentinel()) 107 } 108 109 // telemetryCounterName parses a crash report produced by the Go 110 // runtime, extracts the stack of the first runnable goroutine, 111 // converts each line into telemetry form ("symbol:relative-line"), 112 // and returns this as the name of a counter. 113 func telemetryCounterName(crash []byte) (string, error) { 114 pcs, err := parseStackPCs(string(crash)) 115 if err != nil { 116 return "", err 117 } 118 119 // Limit the number of frames we request. 120 pcs = pcs[:min(len(pcs), 16)] 121 122 if len(pcs) == 0 { 123 // This can occur if all goroutines are idle, as when 124 // caught in a deadlock, or killed by an async signal 125 // while blocked. 126 // 127 // TODO(adonovan): consider how to report such 128 // situations. Reporting a goroutine in [sleep] or 129 // [select] state could be quite confusing without 130 // further information about the nature of the crash, 131 // as the problem is not local to the code location. 132 // 133 // For now, we keep count of this situation so that we 134 // can access whether it needs a more involved solution. 135 return "crash/no-running-goroutine", nil 136 } 137 138 // This string appears at the start of all 139 // crashmonitor-generated counter names. 140 // 141 // It is tempting to expose this as a parameter of Start, but 142 // it is not without risk. What value should most programs 143 // provide? There's no point giving the name of the executable 144 // as this is already recorded by telemetry. What if the 145 // application runs in multiple modes? Then it might be useful 146 // to record the mode. The problem is that an application with 147 // multiple modes probably doesn't know its mode by line 1 of 148 // main.main: it might require flag or argument parsing, or 149 // even validation of an environment variable, and we really 150 // want to steer users aware from any logic before Start. The 151 // flags and arguments will be wrong in the child process, and 152 // every extra conditional branch creates a risk that the 153 // recursively executed child program will behave not like the 154 // monitor but like the application. If the child process 155 // exits before calling Start, then the parent application 156 // will not have a monitor, and its crash reports will be 157 // discarded (written in to a pipe that is never read). 158 // 159 // So for now, we use this constant string. 160 const prefix = "crash/crash" 161 return counter.EncodeStack(pcs, prefix), nil 162 } 163 164 // parseStackPCs parses the parent process's program counters for the 165 // first running goroutine out of a GOTRACEBACK=system traceback, 166 // adjusting them so that they are valid for the child process's text 167 // segment. 168 // 169 // This function returns only program counter values, ensuring that 170 // there is no possibility of strings from the crash report (which may 171 // contain PII) leaking into the telemetry system. 172 func parseStackPCs(crash string) ([]uintptr, error) { 173 // getPC parses the PC out of a line of the form: 174 // \tFILE:LINE +0xRELPC sp=... fp=... pc=... 175 getPC := func(line string) (uint64, error) { 176 _, pcstr, ok := strings.Cut(line, " pc=") // e.g. pc=0x%x 177 if !ok { 178 return 0, fmt.Errorf("no pc= for stack frame: %s", line) 179 } 180 return strconv.ParseUint(pcstr, 0, 64) // 0 => allow 0x prefix 181 } 182 183 var ( 184 pcs []uintptr 185 parentSentinel uint64 186 childSentinel = sentinel() 187 on = false // are we in the first running goroutine? 188 lines = strings.Split(crash, "\n") 189 ) 190 for i := 0; i < len(lines); i++ { 191 line := lines[i] 192 193 // Read sentinel value. 194 if parentSentinel == 0 && strings.HasPrefix(line, "sentinel ") { 195 _, err := fmt.Sscanf(line, "sentinel %x", &parentSentinel) 196 if err != nil { 197 return nil, fmt.Errorf("can't read sentinel line") 198 } 199 continue 200 } 201 202 // Search for "goroutine GID [STATUS]" 203 if !on { 204 if strings.HasPrefix(line, "goroutine ") && 205 strings.Contains(line, " [running]:") { 206 on = true 207 208 if parentSentinel == 0 { 209 return nil, fmt.Errorf("no sentinel value in crash report") 210 } 211 } 212 continue 213 } 214 215 // A blank line marks end of a goroutine stack. 216 if line == "" { 217 break 218 } 219 220 // Skip the final "created by SYMBOL in goroutine GID" part. 221 if strings.HasPrefix(line, "created by ") { 222 break 223 } 224 225 // Expect a pair of lines: 226 // SYMBOL(ARGS) 227 // \tFILE:LINE +0xRELPC sp=0x%x fp=0x%x pc=0x%x 228 // Note: SYMBOL may contain parens "pkg.(*T).method" 229 // The RELPC is sometimes missing. 230 231 // Skip the symbol(args) line. 232 i++ 233 if i == len(lines) { 234 break 235 } 236 line = lines[i] 237 238 // Parse the PC, and correct for the parent and child's 239 // different mappings of the text section. 240 pc, err := getPC(line) 241 if err != nil { 242 // Inlined frame, perhaps; skip it. 243 continue 244 } 245 pcs = append(pcs, uintptr(pc-parentSentinel+childSentinel)) 246 } 247 return pcs, nil 248 } 249 250 func min(x, y int) int { 251 if x < y { 252 return x 253 } else { 254 return y 255 } 256 } 257