1 // Copyright 2024 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package telemetry 6 7 import ( 8 "fmt" 9 "log" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "sync" 14 "time" 15 16 "golang.org/x/sync/errgroup" 17 "golang.org/x/telemetry/counter" 18 "golang.org/x/telemetry/internal/crashmonitor" 19 "golang.org/x/telemetry/internal/telemetry" 20 "golang.org/x/telemetry/internal/upload" 21 ) 22 23 // Config controls the behavior of [Start]. 24 type Config struct { 25 // ReportCrashes, if set, will enable crash reporting. 26 // ReportCrashes uses the [debug.SetCrashOutput] mechanism, which is a 27 // process-wide resource. 28 // Do not make other calls to that function within your application. 29 // ReportCrashes is a non-functional unless the program is built with go1.23+. 30 ReportCrashes bool 31 32 // Upload causes this program to periodically upload approved counters 33 // from the local telemetry database to telemetry.go.dev. 34 // 35 // This option has no effect unless the user has given consent 36 // to enable data collection, for example by running 37 // cmd/gotelemetry or affirming the gopls dialog. 38 // 39 // (This feature is expected to be used only by gopls. 40 // Longer term, the go command may become the sole program 41 // responsible for uploading.) 42 Upload bool 43 44 // TelemetryDir, if set, will specify an alternate telemetry 45 // directory to write data to. If not set, it uses the default 46 // directory. 47 // This field is intended to be used for isolating testing environments. 48 TelemetryDir string 49 50 // UploadStartTime, if set, overrides the time used as the upload start time, 51 // which is the time used by the upload logic to determine whether counter 52 // file data should be uploaded. Only counter files that have expired before 53 // the start time are considered for upload. 54 // 55 // This field can be used to simulate a future upload that collects recently 56 // modified counters. 57 UploadStartTime time.Time 58 59 // UploadURL, if set, overrides the URL used to receive uploaded reports. If 60 // unset, this URL defaults to https://telemetry.go.dev/upload. 61 UploadURL string 62 } 63 64 // Start initializes telemetry using the specified configuration. 65 // 66 // Start opens the local telemetry database so that counter increment 67 // operations are durably recorded in the local file system. 68 // 69 // If [Config.Upload] is set, and the user has opted in to telemetry 70 // uploading, this process may attempt to upload approved counters 71 // to telemetry.go.dev. 72 // 73 // If [Config.ReportCrashes] is set, any fatal crash will be 74 // recorded by incrementing a counter named for the stack of the 75 // first running goroutine in the traceback. 76 // 77 // If either of these flags is set, Start re-executes the current 78 // executable as a child process, in a special mode in which it 79 // acts as a telemetry sidecar for the parent process (the application). 80 // In that mode, the call to Start will never return, so Start must 81 // be called immediately within main, even before such things as 82 // inspecting the command line. The application should avoid expensive 83 // steps or external side effects in init functions, as they will 84 // be executed twice (parent and child). 85 // 86 // Start returns a StartResult, which may be awaited via [StartResult.Wait] to 87 // wait for all work done by Start to complete. 88 func Start(config Config) *StartResult { 89 switch v := os.Getenv(telemetryChildVar); v { 90 case "": 91 // The subprocess started by parent has GO_TELEMETRY_CHILD=1. 92 return parent(config) 93 case "1": 94 child(config) // child will exit the process when it's done. 95 case "2": 96 // Do nothing: this was executed directly or indirectly by a child. 97 default: 98 log.Fatalf("unexpected value for %q: %q", telemetryChildVar, v) 99 } 100 101 return &StartResult{} 102 } 103 104 // MaybeChild executes the telemetry child logic if the calling program is 105 // the telemetry child process, and does nothing otherwise. It is meant to be 106 // called as the first thing in a program that uses telemetry.Start but cannot 107 // call telemetry.Start immediately when it starts. 108 func MaybeChild(config Config) { 109 if v := os.Getenv(telemetryChildVar); v == "1" { 110 child(config) // child will exit the process when it's done. 111 } 112 // other values of the telemetryChildVar environment variable 113 // will be handled by telemetry.Start. 114 } 115 116 // A StartResult is a handle to the result of a call to [Start]. Call 117 // [StartResult.Wait] to wait for the completion of all work done on behalf of 118 // Start. 119 type StartResult struct { 120 wg sync.WaitGroup 121 } 122 123 // Wait waits for the completion of all work initiated by [Start]. 124 func (res *StartResult) Wait() { 125 if res == nil { 126 return 127 } 128 res.wg.Wait() 129 } 130 131 var daemonize = func(cmd *exec.Cmd) {} 132 133 // If telemetryChildVar is set to "1" in the environment, this is the telemetry 134 // child. 135 // 136 // If telemetryChildVar is set to "2", this is a child of the child, and no 137 // further forking should occur. 138 const telemetryChildVar = "GO_TELEMETRY_CHILD" 139 140 // If telemetryUploadVar is set to "1" in the environment, the upload token has been 141 // acquired by the parent, and the child should attempt an upload. 142 const telemetryUploadVar = "GO_TELEMETRY_CHILD_UPLOAD" 143 144 func parent(config Config) *StartResult { 145 if config.TelemetryDir != "" { 146 telemetry.Default = telemetry.NewDir(config.TelemetryDir) 147 } 148 result := new(StartResult) 149 150 mode, _ := telemetry.Default.Mode() 151 if mode == "off" { 152 // Telemetry is turned off. Crash reporting doesn't work without telemetry 153 // at least set to "local". The upload process runs in both "on" and "local" modes. 154 // In local mode the upload process builds local reports but does not do the upload. 155 return result 156 } 157 158 counter.Open() 159 160 if _, err := os.Stat(telemetry.Default.LocalDir()); err != nil { 161 // There was a problem statting LocalDir, which is needed for both 162 // crash monitoring and counter uploading. Most likely, there was an 163 // error creating telemetry.LocalDir in the counter.Open call above. 164 // Don't start the child. 165 return result 166 } 167 168 childShouldUpload := config.Upload && acquireUploadToken() 169 reportCrashes := config.ReportCrashes && crashmonitor.Supported() 170 171 if reportCrashes || childShouldUpload { 172 startChild(reportCrashes, childShouldUpload, result) 173 } 174 175 return result 176 } 177 178 func startChild(reportCrashes, upload bool, result *StartResult) { 179 // This process is the application (parent). 180 // Fork+exec the telemetry child. 181 exe, err := os.Executable() 182 if err != nil { 183 // There was an error getting os.Executable. It's possible 184 // for this to happen on AIX if os.Args[0] is not an absolute 185 // path and we can't find os.Args[0] in PATH. 186 log.Printf("failed to start telemetry sidecar: os.Executable: %v", err) 187 return 188 } 189 cmd := exec.Command(exe, "** telemetry **") // this unused arg is just for ps(1) 190 daemonize(cmd) 191 cmd.Env = append(os.Environ(), telemetryChildVar+"=1") 192 if upload { 193 cmd.Env = append(cmd.Env, telemetryUploadVar+"=1") 194 } 195 cmd.Dir = telemetry.Default.LocalDir() 196 197 // The child process must write to a log file, not 198 // the stderr file it inherited from the parent, as 199 // the child may outlive the parent but should not prolong 200 // the life of any pipes created (by the grandparent) 201 // to gather the output of the parent. 202 // 203 // By default, we discard the child process's stderr, 204 // but in line with the uploader, log to a file in debug 205 // only if that directory was created by the user. 206 fd, err := os.Stat(telemetry.Default.DebugDir()) 207 if err != nil { 208 if !os.IsNotExist(err) { 209 log.Printf("failed to stat debug directory: %v", err) 210 return 211 } 212 } else if fd.IsDir() { 213 // local/debug exists and is a directory. Set stderr to a log file path 214 // in local/debug. 215 childLogPath := filepath.Join(telemetry.Default.DebugDir(), "sidecar.log") 216 childLog, err := os.OpenFile(childLogPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0600) 217 if err != nil { 218 log.Printf("opening sidecar log file for child: %v", err) 219 return 220 } 221 defer childLog.Close() 222 cmd.Stderr = childLog 223 } 224 225 var crashOutputFile *os.File 226 if reportCrashes { 227 pipe, err := cmd.StdinPipe() 228 if err != nil { 229 log.Printf("StdinPipe: %v", err) 230 return 231 } 232 233 crashOutputFile = pipe.(*os.File) // (this conversion is safe) 234 } 235 236 if err := cmd.Start(); err != nil { 237 // The child couldn't be started. Log the failure. 238 log.Printf("can't start telemetry child process: %v", err) 239 return 240 } 241 if reportCrashes { 242 crashmonitor.Parent(crashOutputFile) 243 } 244 result.wg.Add(1) 245 go func() { 246 cmd.Wait() // Release resources if cmd happens not to outlive this process. 247 result.wg.Done() 248 }() 249 } 250 251 func child(config Config) { 252 log.SetPrefix(fmt.Sprintf("telemetry-sidecar (pid %v): ", os.Getpid())) 253 254 if config.TelemetryDir != "" { 255 telemetry.Default = telemetry.NewDir(config.TelemetryDir) 256 } 257 258 // golang/go#67211: be sure to set telemetryChildVar before running the 259 // child, because the child itself invokes the go command to download the 260 // upload config. If the telemetryChildVar variable is still set to "1", 261 // that delegated go command may think that it is itself a telemetry 262 // child. 263 // 264 // On the other hand, if telemetryChildVar were simply unset, then the 265 // delegated go commands would fork themselves recursively. Short-circuit 266 // this recursion. 267 os.Setenv(telemetryChildVar, "2") 268 upload := os.Getenv(telemetryUploadVar) == "1" 269 270 reportCrashes := config.ReportCrashes && crashmonitor.Supported() 271 uploadStartTime := config.UploadStartTime 272 uploadURL := config.UploadURL 273 274 // Start crashmonitoring and uploading depending on what's requested 275 // and wait for the longer running child to complete before exiting: 276 // if we collected a crash before the upload finished, wait for the 277 // upload to finish before exiting 278 var g errgroup.Group 279 280 if reportCrashes { 281 g.Go(func() error { 282 crashmonitor.Child() 283 return nil 284 }) 285 } 286 if upload { 287 g.Go(func() error { 288 uploaderChild(uploadStartTime, uploadURL) 289 return nil 290 }) 291 } 292 g.Wait() 293 294 os.Exit(0) 295 } 296 297 func uploaderChild(asof time.Time, uploadURL string) { 298 if err := upload.Run(upload.RunConfig{ 299 UploadURL: uploadURL, 300 LogWriter: os.Stderr, 301 StartTime: asof, 302 }); err != nil { 303 log.Printf("upload failed: %v", err) 304 } 305 } 306 307 // acquireUploadToken acquires a token permitting the caller to upload. 308 // To limit the frequency of uploads, only one token is issue per 309 // machine per time period. 310 // The boolean indicates whether the token was acquired. 311 func acquireUploadToken() bool { 312 if telemetry.Default.LocalDir() == "" { 313 // The telemetry dir wasn't initialized properly, probably because 314 // os.UserConfigDir did not complete successfully. In that case 315 // there are no counters to upload, so we should just do nothing. 316 return false 317 } 318 tokenfile := filepath.Join(telemetry.Default.LocalDir(), "upload.token") 319 const period = 24 * time.Hour 320 321 // A process acquires a token by successfully creating a 322 // well-known file. If the file already exists and has an 323 // mtime age less then than the period, the process does 324 // not acquire the token. If the file is older than the 325 // period, the process is allowed to remove the file and 326 // try to re-create it. 327 fi, err := os.Stat(tokenfile) 328 if err == nil { 329 if time.Since(fi.ModTime()) < period { 330 return false 331 } 332 // There's a possible race here where two processes check the 333 // token file and see that it's older than the period, then the 334 // first one removes it and creates another, and then a second one 335 // removes the newly created file and creates yet another 336 // file. Then both processes would act as though they had the token. 337 // This is very rare, but it's also okay because we're only grabbing 338 // the token to do rate limiting, not for correctness. 339 _ = os.Remove(tokenfile) 340 } else if !os.IsNotExist(err) { 341 log.Printf("error acquiring upload taken: statting token file: %v", err) 342 return false 343 } 344 345 f, err := os.OpenFile(tokenfile, os.O_CREATE|os.O_EXCL, 0666) 346 if err != nil { 347 if os.IsExist(err) { 348 return false 349 } 350 log.Printf("error acquiring upload token: creating token file: %v", err) 351 return false 352 } 353 _ = f.Close() 354 return true 355 } 356