...

Source file src/internal/fuzz/fuzz.go

Documentation: internal/fuzz

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package fuzz provides common fuzzing functionality for tests built with
     6  // "go test" and for programs that use fuzzing functionality in the testing
     7  // package.
     8  package fuzz
     9  
    10  import (
    11  	"bytes"
    12  	"context"
    13  	"crypto/sha256"
    14  	"errors"
    15  	"fmt"
    16  	"internal/godebug"
    17  	"io"
    18  	"math/bits"
    19  	"os"
    20  	"path/filepath"
    21  	"reflect"
    22  	"runtime"
    23  	"strings"
    24  	"time"
    25  )
    26  
    27  // CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing.
    28  // The zero value is valid for each field unless specified otherwise.
    29  type CoordinateFuzzingOpts struct {
    30  	// Log is a writer for logging progress messages and warnings.
    31  	// If nil, io.Discard will be used instead.
    32  	Log io.Writer
    33  
    34  	// Timeout is the amount of wall clock time to spend fuzzing after the corpus
    35  	// has loaded. If zero, there will be no time limit.
    36  	Timeout time.Duration
    37  
    38  	// Limit is the number of random values to generate and test. If zero,
    39  	// there will be no limit on the number of generated values.
    40  	Limit int64
    41  
    42  	// MinimizeTimeout is the amount of wall clock time to spend minimizing
    43  	// after discovering a crasher. If zero, there will be no time limit. If
    44  	// MinimizeTimeout and MinimizeLimit are both zero, then minimization will
    45  	// be disabled.
    46  	MinimizeTimeout time.Duration
    47  
    48  	// MinimizeLimit is the maximum number of calls to the fuzz function to be
    49  	// made while minimizing after finding a crash. If zero, there will be no
    50  	// limit. Calls to the fuzz function made when minimizing also count toward
    51  	// Limit. If MinimizeTimeout and MinimizeLimit are both zero, then
    52  	// minimization will be disabled.
    53  	MinimizeLimit int64
    54  
    55  	// parallel is the number of worker processes to run in parallel. If zero,
    56  	// CoordinateFuzzing will run GOMAXPROCS workers.
    57  	Parallel int
    58  
    59  	// Seed is a list of seed values added by the fuzz target with testing.F.Add
    60  	// and in testdata.
    61  	Seed []CorpusEntry
    62  
    63  	// Types is the list of types which make up a corpus entry.
    64  	// Types must be set and must match values in Seed.
    65  	Types []reflect.Type
    66  
    67  	// CorpusDir is a directory where files containing values that crash the
    68  	// code being tested may be written. CorpusDir must be set.
    69  	CorpusDir string
    70  
    71  	// CacheDir is a directory containing additional "interesting" values.
    72  	// The fuzzer may derive new values from these, and may write new values here.
    73  	CacheDir string
    74  }
    75  
    76  // CoordinateFuzzing creates several worker processes and communicates with
    77  // them to test random inputs that could trigger crashes and expose bugs.
    78  // The worker processes run the same binary in the same directory with the
    79  // same environment variables as the coordinator process. Workers also run
    80  // with the same arguments as the coordinator, except with the -test.fuzzworker
    81  // flag prepended to the argument list.
    82  //
    83  // If a crash occurs, the function will return an error containing information
    84  // about the crash, which can be reported to the user.
    85  func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) {
    86  	if err := ctx.Err(); err != nil {
    87  		return err
    88  	}
    89  	if opts.Log == nil {
    90  		opts.Log = io.Discard
    91  	}
    92  	if opts.Parallel == 0 {
    93  		opts.Parallel = runtime.GOMAXPROCS(0)
    94  	}
    95  	if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit {
    96  		// Don't start more workers than we need.
    97  		opts.Parallel = int(opts.Limit)
    98  	}
    99  
   100  	c, err := newCoordinator(opts)
   101  	if err != nil {
   102  		return err
   103  	}
   104  
   105  	if opts.Timeout > 0 {
   106  		var cancel func()
   107  		ctx, cancel = context.WithTimeout(ctx, opts.Timeout)
   108  		defer cancel()
   109  	}
   110  
   111  	// fuzzCtx is used to stop workers, for example, after finding a crasher.
   112  	fuzzCtx, cancelWorkers := context.WithCancel(ctx)
   113  	defer cancelWorkers()
   114  	doneC := ctx.Done()
   115  
   116  	// stop is called when a worker encounters a fatal error.
   117  	var fuzzErr error
   118  	stopping := false
   119  	stop := func(err error) {
   120  		if shouldPrintDebugInfo() {
   121  			_, file, line, ok := runtime.Caller(1)
   122  			if ok {
   123  				c.debugLogf("stop called at %s:%d. stopping: %t", file, line, stopping)
   124  			} else {
   125  				c.debugLogf("stop called at unknown. stopping: %t", stopping)
   126  			}
   127  		}
   128  
   129  		if err == fuzzCtx.Err() || isInterruptError(err) {
   130  			// Suppress cancellation errors and terminations due to SIGINT.
   131  			// The messages are not helpful since either the user triggered the error
   132  			// (with ^C) or another more helpful message will be printed (a crasher).
   133  			err = nil
   134  		}
   135  		if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) {
   136  			fuzzErr = err
   137  		}
   138  		if stopping {
   139  			return
   140  		}
   141  		stopping = true
   142  		cancelWorkers()
   143  		doneC = nil
   144  	}
   145  
   146  	// Ensure that any crash we find is written to the corpus, even if an error
   147  	// or interruption occurs while minimizing it.
   148  	crashWritten := false
   149  	defer func() {
   150  		if c.crashMinimizing == nil || crashWritten {
   151  			return
   152  		}
   153  		werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir)
   154  		if werr != nil {
   155  			err = fmt.Errorf("%w\n%v", err, werr)
   156  			return
   157  		}
   158  		if err == nil {
   159  			err = &crashError{
   160  				path: c.crashMinimizing.entry.Path,
   161  				err:  errors.New(c.crashMinimizing.crasherMsg),
   162  			}
   163  		}
   164  	}()
   165  
   166  	// Start workers.
   167  	// TODO(jayconrod): do we want to support fuzzing different binaries?
   168  	dir := "" // same as self
   169  	binPath := os.Args[0]
   170  	args := append([]string{"-test.fuzzworker"}, os.Args[1:]...)
   171  	env := os.Environ() // same as self
   172  
   173  	errC := make(chan error)
   174  	workers := make([]*worker, opts.Parallel)
   175  	for i := range workers {
   176  		var err error
   177  		workers[i], err = newWorker(c, dir, binPath, args, env)
   178  		if err != nil {
   179  			return err
   180  		}
   181  	}
   182  	for i := range workers {
   183  		w := workers[i]
   184  		go func() {
   185  			err := w.coordinate(fuzzCtx)
   186  			if fuzzCtx.Err() != nil || isInterruptError(err) {
   187  				err = nil
   188  			}
   189  			cleanErr := w.cleanup()
   190  			if err == nil {
   191  				err = cleanErr
   192  			}
   193  			errC <- err
   194  		}()
   195  	}
   196  
   197  	// Main event loop.
   198  	// Do not return until all workers have terminated. We avoid a deadlock by
   199  	// receiving messages from workers even after ctx is canceled.
   200  	activeWorkers := len(workers)
   201  	statTicker := time.NewTicker(3 * time.Second)
   202  	defer statTicker.Stop()
   203  	defer c.logStats()
   204  
   205  	c.logStats()
   206  	for {
   207  		// If there is an execution limit, and we've reached it, stop.
   208  		if c.opts.Limit > 0 && c.count >= c.opts.Limit {
   209  			stop(nil)
   210  		}
   211  
   212  		var inputC chan fuzzInput
   213  		input, ok := c.peekInput()
   214  		if ok && c.crashMinimizing == nil && !stopping {
   215  			inputC = c.inputC
   216  		}
   217  
   218  		var minimizeC chan fuzzMinimizeInput
   219  		minimizeInput, ok := c.peekMinimizeInput()
   220  		if ok && !stopping {
   221  			minimizeC = c.minimizeC
   222  		}
   223  
   224  		select {
   225  		case <-doneC:
   226  			// Interrupted, canceled, or timed out.
   227  			// stop sets doneC to nil, so we don't busy wait here.
   228  			stop(ctx.Err())
   229  
   230  		case err := <-errC:
   231  			// A worker terminated, possibly after encountering a fatal error.
   232  			stop(err)
   233  			activeWorkers--
   234  			if activeWorkers == 0 {
   235  				return fuzzErr
   236  			}
   237  
   238  		case result := <-c.resultC:
   239  			// Received response from worker.
   240  			if stopping {
   241  				break
   242  			}
   243  			c.updateStats(result)
   244  
   245  			if result.crasherMsg != "" {
   246  				if c.warmupRun() && result.entry.IsSeed {
   247  					target := filepath.Base(c.opts.CorpusDir)
   248  					fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent))
   249  					stop(errors.New(result.crasherMsg))
   250  					break
   251  				}
   252  				if c.canMinimize() && result.canMinimize {
   253  					if c.crashMinimizing != nil {
   254  						// This crash is not minimized, and another crash is being minimized.
   255  						// Ignore this one and wait for the other one to finish.
   256  						if shouldPrintDebugInfo() {
   257  							c.debugLogf("found unminimized crasher, skipping in favor of minimizable crasher")
   258  						}
   259  						break
   260  					}
   261  					// Found a crasher but haven't yet attempted to minimize it.
   262  					// Send it back to a worker for minimization. Disable inputC so
   263  					// other workers don't continue fuzzing.
   264  					c.crashMinimizing = &result
   265  					fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data))
   266  					c.queueForMinimization(result, nil)
   267  				} else if !crashWritten {
   268  					// Found a crasher that's either minimized or not minimizable.
   269  					// Write to corpus and stop.
   270  					err := writeToCorpus(&result.entry, opts.CorpusDir)
   271  					if err == nil {
   272  						crashWritten = true
   273  						err = &crashError{
   274  							path: result.entry.Path,
   275  							err:  errors.New(result.crasherMsg),
   276  						}
   277  					}
   278  					if shouldPrintDebugInfo() {
   279  						c.debugLogf(
   280  							"found crasher, id: %s, parent: %s, gen: %d, size: %d, exec time: %s",
   281  							result.entry.Path,
   282  							result.entry.Parent,
   283  							result.entry.Generation,
   284  							len(result.entry.Data),
   285  							result.entryDuration,
   286  						)
   287  					}
   288  					stop(err)
   289  				}
   290  			} else if result.coverageData != nil {
   291  				if c.warmupRun() {
   292  					if shouldPrintDebugInfo() {
   293  						c.debugLogf(
   294  							"processed an initial input, id: %s, new bits: %d, size: %d, exec time: %s",
   295  							result.entry.Parent,
   296  							countBits(diffCoverage(c.coverageMask, result.coverageData)),
   297  							len(result.entry.Data),
   298  							result.entryDuration,
   299  						)
   300  					}
   301  					c.updateCoverage(result.coverageData)
   302  					c.warmupInputLeft--
   303  					if c.warmupInputLeft == 0 {
   304  						fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel)
   305  						if shouldPrintDebugInfo() {
   306  							c.debugLogf(
   307  								"finished processing input corpus, entries: %d, initial coverage bits: %d",
   308  								len(c.corpus.entries),
   309  								countBits(c.coverageMask),
   310  							)
   311  						}
   312  					}
   313  				} else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil {
   314  					// Found a value that expanded coverage.
   315  					// It's not a crasher, but we may want to add it to the on-disk
   316  					// corpus and prioritize it for future fuzzing.
   317  					// TODO(jayconrod, katiehockman): Prioritize fuzzing these
   318  					// values which expanded coverage, perhaps based on the
   319  					// number of new edges that this result expanded.
   320  					// TODO(jayconrod, katiehockman): Don't write a value that's already
   321  					// in the corpus.
   322  					if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil {
   323  						// Send back to workers to find a smaller value that preserves
   324  						// at least one new coverage bit.
   325  						c.queueForMinimization(result, keepCoverage)
   326  					} else {
   327  						// Update the coordinator's coverage mask and save the value.
   328  						inputSize := len(result.entry.Data)
   329  						entryNew, err := c.addCorpusEntries(true, result.entry)
   330  						if err != nil {
   331  							stop(err)
   332  							break
   333  						}
   334  						if !entryNew {
   335  							if shouldPrintDebugInfo() {
   336  								c.debugLogf(
   337  									"ignoring duplicate input which increased coverage, id: %s",
   338  									result.entry.Path,
   339  								)
   340  							}
   341  							break
   342  						}
   343  						c.updateCoverage(keepCoverage)
   344  						c.inputQueue.enqueue(result.entry)
   345  						c.interestingCount++
   346  						if shouldPrintDebugInfo() {
   347  							c.debugLogf(
   348  								"new interesting input, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s",
   349  								result.entry.Path,
   350  								result.entry.Parent,
   351  								result.entry.Generation,
   352  								countBits(keepCoverage),
   353  								countBits(c.coverageMask),
   354  								inputSize,
   355  								result.entryDuration,
   356  							)
   357  						}
   358  					}
   359  				} else {
   360  					if shouldPrintDebugInfo() {
   361  						c.debugLogf(
   362  							"worker reported interesting input that doesn't expand coverage, id: %s, parent: %s, canMinimize: %t",
   363  							result.entry.Path,
   364  							result.entry.Parent,
   365  							result.canMinimize,
   366  						)
   367  					}
   368  				}
   369  			} else if c.warmupRun() {
   370  				// No error or coverage data was reported for this input during
   371  				// warmup, so continue processing results.
   372  				c.warmupInputLeft--
   373  				if c.warmupInputLeft == 0 {
   374  					fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel)
   375  					if shouldPrintDebugInfo() {
   376  						c.debugLogf(
   377  							"finished testing-only phase, entries: %d",
   378  							len(c.corpus.entries),
   379  						)
   380  					}
   381  				}
   382  			}
   383  
   384  		case inputC <- input:
   385  			// Sent the next input to a worker.
   386  			c.sentInput(input)
   387  
   388  		case minimizeC <- minimizeInput:
   389  			// Sent the next input for minimization to a worker.
   390  			c.sentMinimizeInput(minimizeInput)
   391  
   392  		case <-statTicker.C:
   393  			c.logStats()
   394  		}
   395  	}
   396  
   397  	// TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus,
   398  	// write to the cache instead.
   399  }
   400  
   401  // crashError wraps a crasher written to the seed corpus. It saves the name
   402  // of the file where the input causing the crasher was saved. The testing
   403  // framework uses this to report a command to re-run that specific input.
   404  type crashError struct {
   405  	path string
   406  	err  error
   407  }
   408  
   409  func (e *crashError) Error() string {
   410  	return e.err.Error()
   411  }
   412  
   413  func (e *crashError) Unwrap() error {
   414  	return e.err
   415  }
   416  
   417  func (e *crashError) CrashPath() string {
   418  	return e.path
   419  }
   420  
   421  type corpus struct {
   422  	entries []CorpusEntry
   423  	hashes  map[[sha256.Size]byte]bool
   424  }
   425  
   426  // addCorpusEntries adds entries to the corpus, and optionally writes the entries
   427  // to the cache directory. If an entry is already in the corpus it is skipped. If
   428  // all of the entries are unique, addCorpusEntries returns true and a nil error,
   429  // if at least one of the entries was a duplicate, it returns false and a nil error.
   430  func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) {
   431  	noDupes := true
   432  	for _, e := range entries {
   433  		data, err := corpusEntryData(e)
   434  		if err != nil {
   435  			return false, err
   436  		}
   437  		h := sha256.Sum256(data)
   438  		if c.corpus.hashes[h] {
   439  			noDupes = false
   440  			continue
   441  		}
   442  		if addToCache {
   443  			if err := writeToCorpus(&e, c.opts.CacheDir); err != nil {
   444  				return false, err
   445  			}
   446  			// For entries written to disk, we don't hold onto the bytes,
   447  			// since the corpus would consume a significant amount of
   448  			// memory.
   449  			e.Data = nil
   450  		}
   451  		c.corpus.hashes[h] = true
   452  		c.corpus.entries = append(c.corpus.entries, e)
   453  	}
   454  	return noDupes, nil
   455  }
   456  
   457  // CorpusEntry represents an individual input for fuzzing.
   458  //
   459  // We must use an equivalent type in the testing and testing/internal/testdeps
   460  // packages, but testing can't import this package directly, and we don't want
   461  // to export this type from testing. Instead, we use the same struct type and
   462  // use a type alias (not a defined type) for convenience.
   463  type CorpusEntry = struct {
   464  	Parent string
   465  
   466  	// Path is the path of the corpus file, if the entry was loaded from disk.
   467  	// For other entries, including seed values provided by f.Add, Path is the
   468  	// name of the test, e.g. seed#0 or its hash.
   469  	Path string
   470  
   471  	// Data is the raw input data. Data should only be populated for seed
   472  	// values. For on-disk corpus files, Data will be nil, as it will be loaded
   473  	// from disk using Path.
   474  	Data []byte
   475  
   476  	// Values is the unmarshaled values from a corpus file.
   477  	Values []any
   478  
   479  	Generation int
   480  
   481  	// IsSeed indicates whether this entry is part of the seed corpus.
   482  	IsSeed bool
   483  }
   484  
   485  // corpusEntryData returns the raw input bytes, either from the data struct
   486  // field, or from disk.
   487  func corpusEntryData(ce CorpusEntry) ([]byte, error) {
   488  	if ce.Data != nil {
   489  		return ce.Data, nil
   490  	}
   491  
   492  	return os.ReadFile(ce.Path)
   493  }
   494  
   495  type fuzzInput struct {
   496  	// entry is the value to test initially. The worker will randomly mutate
   497  	// values from this starting point.
   498  	entry CorpusEntry
   499  
   500  	// timeout is the time to spend fuzzing variations of this input,
   501  	// not including starting or cleaning up.
   502  	timeout time.Duration
   503  
   504  	// limit is the maximum number of calls to the fuzz function the worker may
   505  	// make. The worker may make fewer calls, for example, if it finds an
   506  	// error early. If limit is zero, there is no limit on calls to the
   507  	// fuzz function.
   508  	limit int64
   509  
   510  	// warmup indicates whether this is a warmup input before fuzzing begins. If
   511  	// true, the input should not be fuzzed.
   512  	warmup bool
   513  
   514  	// coverageData reflects the coordinator's current coverageMask.
   515  	coverageData []byte
   516  }
   517  
   518  type fuzzResult struct {
   519  	// entry is an interesting value or a crasher.
   520  	entry CorpusEntry
   521  
   522  	// crasherMsg is an error message from a crash. It's "" if no crash was found.
   523  	crasherMsg string
   524  
   525  	// canMinimize is true if the worker should attempt to minimize this result.
   526  	// It may be false because an attempt has already been made.
   527  	canMinimize bool
   528  
   529  	// coverageData is set if the worker found new coverage.
   530  	coverageData []byte
   531  
   532  	// limit is the number of values the coordinator asked the worker
   533  	// to test. 0 if there was no limit.
   534  	limit int64
   535  
   536  	// count is the number of values the worker actually tested.
   537  	count int64
   538  
   539  	// totalDuration is the time the worker spent testing inputs.
   540  	totalDuration time.Duration
   541  
   542  	// entryDuration is the time the worker spent execution an interesting result
   543  	entryDuration time.Duration
   544  }
   545  
   546  type fuzzMinimizeInput struct {
   547  	// entry is an interesting value or crasher to minimize.
   548  	entry CorpusEntry
   549  
   550  	// crasherMsg is an error message from a crash. It's "" if no crash was found.
   551  	// If set, the worker will attempt to find a smaller input that also produces
   552  	// an error, though not necessarily the same error.
   553  	crasherMsg string
   554  
   555  	// limit is the maximum number of calls to the fuzz function the worker may
   556  	// make. The worker may make fewer calls, for example, if it can't reproduce
   557  	// an error. If limit is zero, there is no limit on calls to the fuzz function.
   558  	limit int64
   559  
   560  	// timeout is the time to spend minimizing this input.
   561  	// A zero timeout means no limit.
   562  	timeout time.Duration
   563  
   564  	// keepCoverage is a set of coverage bits that entry found that were not in
   565  	// the coordinator's combined set. When minimizing, the worker should find an
   566  	// input that preserves at least one of these bits. keepCoverage is nil for
   567  	// crashing inputs.
   568  	keepCoverage []byte
   569  }
   570  
   571  // coordinator holds channels that workers can use to communicate with
   572  // the coordinator.
   573  type coordinator struct {
   574  	opts CoordinateFuzzingOpts
   575  
   576  	// startTime is the time we started the workers after loading the corpus.
   577  	// Used for logging.
   578  	startTime time.Time
   579  
   580  	// inputC is sent values to fuzz by the coordinator. Any worker may receive
   581  	// values from this channel. Workers send results to resultC.
   582  	inputC chan fuzzInput
   583  
   584  	// minimizeC is sent values to minimize by the coordinator. Any worker may
   585  	// receive values from this channel. Workers send results to resultC.
   586  	minimizeC chan fuzzMinimizeInput
   587  
   588  	// resultC is sent results of fuzzing by workers. The coordinator
   589  	// receives these. Multiple types of messages are allowed.
   590  	resultC chan fuzzResult
   591  
   592  	// count is the number of values fuzzed so far.
   593  	count int64
   594  
   595  	// countLastLog is the number of values fuzzed when the output was last
   596  	// logged.
   597  	countLastLog int64
   598  
   599  	// timeLastLog is the time at which the output was last logged.
   600  	timeLastLog time.Time
   601  
   602  	// interestingCount is the number of unique interesting values which have
   603  	// been found this execution.
   604  	interestingCount int
   605  
   606  	// warmupInputCount is the count of all entries in the corpus which will
   607  	// need to be received from workers to run once during warmup, but not fuzz.
   608  	// This could be for coverage data, or only for the purposes of verifying
   609  	// that the seed corpus doesn't have any crashers. See warmupRun.
   610  	warmupInputCount int
   611  
   612  	// warmupInputLeft is the number of entries in the corpus which still need
   613  	// to be received from workers to run once during warmup, but not fuzz.
   614  	// See warmupInputLeft.
   615  	warmupInputLeft int
   616  
   617  	// duration is the time spent fuzzing inside workers, not counting time
   618  	// starting up or tearing down.
   619  	duration time.Duration
   620  
   621  	// countWaiting is the number of fuzzing executions the coordinator is
   622  	// waiting on workers to complete.
   623  	countWaiting int64
   624  
   625  	// corpus is a set of interesting values, including the seed corpus and
   626  	// generated values that workers reported as interesting.
   627  	corpus corpus
   628  
   629  	// minimizationAllowed is true if one or more of the types of fuzz
   630  	// function's parameters can be minimized.
   631  	minimizationAllowed bool
   632  
   633  	// inputQueue is a queue of inputs that workers should try fuzzing. This is
   634  	// initially populated from the seed corpus and cached inputs. More inputs
   635  	// may be added as new coverage is discovered.
   636  	inputQueue queue
   637  
   638  	// minimizeQueue is a queue of inputs that caused errors or exposed new
   639  	// coverage. Workers should attempt to find smaller inputs that do the
   640  	// same thing.
   641  	minimizeQueue queue
   642  
   643  	// crashMinimizing is the crash that is currently being minimized.
   644  	crashMinimizing *fuzzResult
   645  
   646  	// coverageMask aggregates coverage that was found for all inputs in the
   647  	// corpus. Each byte represents a single basic execution block. Each set bit
   648  	// within the byte indicates that an input has triggered that block at least
   649  	// 1 << n times, where n is the position of the bit in the byte. For example, a
   650  	// value of 12 indicates that separate inputs have triggered this block
   651  	// between 4-7 times and 8-15 times.
   652  	coverageMask []byte
   653  }
   654  
   655  func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
   656  	// Make sure all the seed corpus has marshaled data.
   657  	for i := range opts.Seed {
   658  		if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil {
   659  			opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...)
   660  		}
   661  	}
   662  	c := &coordinator{
   663  		opts:        opts,
   664  		startTime:   time.Now(),
   665  		inputC:      make(chan fuzzInput),
   666  		minimizeC:   make(chan fuzzMinimizeInput),
   667  		resultC:     make(chan fuzzResult),
   668  		timeLastLog: time.Now(),
   669  		corpus:      corpus{hashes: make(map[[sha256.Size]byte]bool)},
   670  	}
   671  	if err := c.readCache(); err != nil {
   672  		return nil, err
   673  	}
   674  	if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 {
   675  		for _, t := range opts.Types {
   676  			if isMinimizable(t) {
   677  				c.minimizationAllowed = true
   678  				break
   679  			}
   680  		}
   681  	}
   682  
   683  	covSize := len(coverage())
   684  	if covSize == 0 {
   685  		fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n")
   686  		// Even though a coverage-only run won't occur, we should still run all
   687  		// of the seed corpus to make sure there are no existing failures before
   688  		// we start fuzzing.
   689  		c.warmupInputCount = len(c.opts.Seed)
   690  		for _, e := range c.opts.Seed {
   691  			c.inputQueue.enqueue(e)
   692  		}
   693  	} else {
   694  		c.warmupInputCount = len(c.corpus.entries)
   695  		for _, e := range c.corpus.entries {
   696  			c.inputQueue.enqueue(e)
   697  		}
   698  		// Set c.coverageMask to a clean []byte full of zeros.
   699  		c.coverageMask = make([]byte, covSize)
   700  	}
   701  	c.warmupInputLeft = c.warmupInputCount
   702  
   703  	if len(c.corpus.entries) == 0 {
   704  		fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n")
   705  		var vals []any
   706  		for _, t := range opts.Types {
   707  			vals = append(vals, zeroValue(t))
   708  		}
   709  		data := marshalCorpusFile(vals...)
   710  		h := sha256.Sum256(data)
   711  		name := fmt.Sprintf("%x", h[:4])
   712  		c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data})
   713  	}
   714  
   715  	return c, nil
   716  }
   717  
   718  func (c *coordinator) updateStats(result fuzzResult) {
   719  	c.count += result.count
   720  	c.countWaiting -= result.limit
   721  	c.duration += result.totalDuration
   722  }
   723  
   724  func (c *coordinator) logStats() {
   725  	now := time.Now()
   726  	if c.warmupRun() {
   727  		runSoFar := c.warmupInputCount - c.warmupInputLeft
   728  		if coverageEnabled {
   729  			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount)
   730  		} else {
   731  			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount)
   732  		}
   733  	} else if c.crashMinimizing != nil {
   734  		fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed())
   735  	} else {
   736  		rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds()
   737  		if coverageEnabled {
   738  			total := c.warmupInputCount + c.interestingCount
   739  			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, total)
   740  		} else {
   741  			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate)
   742  		}
   743  	}
   744  	c.countLastLog = c.count
   745  	c.timeLastLog = now
   746  }
   747  
   748  // peekInput returns the next value that should be sent to workers.
   749  // If the number of executions is limited, the returned value includes
   750  // a limit for one worker. If there are no executions left, peekInput returns
   751  // a zero value and false.
   752  //
   753  // peekInput doesn't actually remove the input from the queue. The caller
   754  // must call sentInput after sending the input.
   755  //
   756  // If the input queue is empty and the coverage/testing-only run has completed,
   757  // queue refills it from the corpus.
   758  func (c *coordinator) peekInput() (fuzzInput, bool) {
   759  	if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit {
   760  		// Already making the maximum number of calls to the fuzz function.
   761  		// Don't send more inputs right now.
   762  		return fuzzInput{}, false
   763  	}
   764  	if c.inputQueue.len == 0 {
   765  		if c.warmupRun() {
   766  			// Wait for coverage/testing-only run to finish before sending more
   767  			// inputs.
   768  			return fuzzInput{}, false
   769  		}
   770  		c.refillInputQueue()
   771  	}
   772  
   773  	entry, ok := c.inputQueue.peek()
   774  	if !ok {
   775  		panic("input queue empty after refill")
   776  	}
   777  	input := fuzzInput{
   778  		entry:   entry.(CorpusEntry),
   779  		timeout: workerFuzzDuration,
   780  		warmup:  c.warmupRun(),
   781  	}
   782  	if c.coverageMask != nil {
   783  		input.coverageData = bytes.Clone(c.coverageMask)
   784  	}
   785  	if input.warmup {
   786  		// No fuzzing will occur, but it should count toward the limit set by
   787  		// -fuzztime.
   788  		input.limit = 1
   789  		return input, true
   790  	}
   791  
   792  	if c.opts.Limit > 0 {
   793  		input.limit = c.opts.Limit / int64(c.opts.Parallel)
   794  		if c.opts.Limit%int64(c.opts.Parallel) > 0 {
   795  			input.limit++
   796  		}
   797  		remaining := c.opts.Limit - c.count - c.countWaiting
   798  		if input.limit > remaining {
   799  			input.limit = remaining
   800  		}
   801  	}
   802  	return input, true
   803  }
   804  
   805  // sentInput updates internal counters after an input is sent to c.inputC.
   806  func (c *coordinator) sentInput(input fuzzInput) {
   807  	c.inputQueue.dequeue()
   808  	c.countWaiting += input.limit
   809  }
   810  
   811  // refillInputQueue refills the input queue from the corpus after it becomes
   812  // empty.
   813  func (c *coordinator) refillInputQueue() {
   814  	for _, e := range c.corpus.entries {
   815  		c.inputQueue.enqueue(e)
   816  	}
   817  }
   818  
   819  // queueForMinimization creates a fuzzMinimizeInput from result and adds it
   820  // to the minimization queue to be sent to workers.
   821  func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) {
   822  	if shouldPrintDebugInfo() {
   823  		c.debugLogf(
   824  			"queueing input for minimization, id: %s, parent: %s, keepCoverage: %t, crasher: %t",
   825  			result.entry.Path,
   826  			result.entry.Parent,
   827  			keepCoverage != nil,
   828  			result.crasherMsg != "",
   829  		)
   830  	}
   831  	if result.crasherMsg != "" {
   832  		c.minimizeQueue.clear()
   833  	}
   834  
   835  	input := fuzzMinimizeInput{
   836  		entry:        result.entry,
   837  		crasherMsg:   result.crasherMsg,
   838  		keepCoverage: keepCoverage,
   839  	}
   840  	c.minimizeQueue.enqueue(input)
   841  }
   842  
   843  // peekMinimizeInput returns the next input that should be sent to workers for
   844  // minimization.
   845  func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) {
   846  	if !c.canMinimize() {
   847  		// Already making the maximum number of calls to the fuzz function.
   848  		// Don't send more inputs right now.
   849  		return fuzzMinimizeInput{}, false
   850  	}
   851  	v, ok := c.minimizeQueue.peek()
   852  	if !ok {
   853  		return fuzzMinimizeInput{}, false
   854  	}
   855  	input := v.(fuzzMinimizeInput)
   856  
   857  	if c.opts.MinimizeTimeout > 0 {
   858  		input.timeout = c.opts.MinimizeTimeout
   859  	}
   860  	if c.opts.MinimizeLimit > 0 {
   861  		input.limit = c.opts.MinimizeLimit
   862  	} else if c.opts.Limit > 0 {
   863  		if input.crasherMsg != "" {
   864  			input.limit = c.opts.Limit
   865  		} else {
   866  			input.limit = c.opts.Limit / int64(c.opts.Parallel)
   867  			if c.opts.Limit%int64(c.opts.Parallel) > 0 {
   868  				input.limit++
   869  			}
   870  		}
   871  	}
   872  	if c.opts.Limit > 0 {
   873  		remaining := c.opts.Limit - c.count - c.countWaiting
   874  		if input.limit > remaining {
   875  			input.limit = remaining
   876  		}
   877  	}
   878  	return input, true
   879  }
   880  
   881  // sentMinimizeInput removes an input from the minimization queue after it's
   882  // sent to minimizeC.
   883  func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) {
   884  	c.minimizeQueue.dequeue()
   885  	c.countWaiting += input.limit
   886  }
   887  
   888  // warmupRun returns true while the coordinator is running inputs without
   889  // mutating them as a warmup before fuzzing. This could be to gather baseline
   890  // coverage data for entries in the corpus, or to test all of the seed corpus
   891  // for errors before fuzzing begins.
   892  //
   893  // The coordinator doesn't store coverage data in the cache with each input
   894  // because that data would be invalid when counter offsets in the test binary
   895  // change.
   896  //
   897  // When gathering coverage, the coordinator sends each entry to a worker to
   898  // gather coverage for that entry only, without fuzzing or minimizing. This
   899  // phase ends when all workers have finished, and the coordinator has a combined
   900  // coverage map.
   901  func (c *coordinator) warmupRun() bool {
   902  	return c.warmupInputLeft > 0
   903  }
   904  
   905  // updateCoverage sets bits in c.coverageMask that are set in newCoverage.
   906  // updateCoverage returns the number of newly set bits. See the comment on
   907  // coverageMask for the format.
   908  func (c *coordinator) updateCoverage(newCoverage []byte) int {
   909  	if len(newCoverage) != len(c.coverageMask) {
   910  		panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask)))
   911  	}
   912  	newBitCount := 0
   913  	for i := range newCoverage {
   914  		diff := newCoverage[i] &^ c.coverageMask[i]
   915  		newBitCount += bits.OnesCount8(diff)
   916  		c.coverageMask[i] |= newCoverage[i]
   917  	}
   918  	return newBitCount
   919  }
   920  
   921  // canMinimize returns whether the coordinator should attempt to find smaller
   922  // inputs that reproduce a crash or new coverage.
   923  func (c *coordinator) canMinimize() bool {
   924  	return c.minimizationAllowed &&
   925  		(c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit)
   926  }
   927  
   928  func (c *coordinator) elapsed() time.Duration {
   929  	return time.Since(c.startTime).Round(1 * time.Second)
   930  }
   931  
   932  // readCache creates a combined corpus from seed values and values in the cache
   933  // (in GOCACHE/fuzz).
   934  //
   935  // TODO(fuzzing): need a mechanism that can remove values that
   936  // aren't useful anymore, for example, because they have the wrong type.
   937  func (c *coordinator) readCache() error {
   938  	if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil {
   939  		return err
   940  	}
   941  	entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types)
   942  	if err != nil {
   943  		if _, ok := err.(*MalformedCorpusError); !ok {
   944  			// It's okay if some files in the cache directory are malformed and
   945  			// are not included in the corpus, but fail if it's an I/O error.
   946  			return err
   947  		}
   948  		// TODO(jayconrod,katiehockman): consider printing some kind of warning
   949  		// indicating the number of files which were skipped because they are
   950  		// malformed.
   951  	}
   952  	if _, err := c.addCorpusEntries(false, entries...); err != nil {
   953  		return err
   954  	}
   955  	return nil
   956  }
   957  
   958  // MalformedCorpusError is an error found while reading the corpus from the
   959  // filesystem. All of the errors are stored in the errs list. The testing
   960  // framework uses this to report malformed files in testdata.
   961  type MalformedCorpusError struct {
   962  	errs []error
   963  }
   964  
   965  func (e *MalformedCorpusError) Error() string {
   966  	var msgs []string
   967  	for _, s := range e.errs {
   968  		msgs = append(msgs, s.Error())
   969  	}
   970  	return strings.Join(msgs, "\n")
   971  }
   972  
   973  // ReadCorpus reads the corpus from the provided dir. The returned corpus
   974  // entries are guaranteed to match the given types. Any malformed files will
   975  // be saved in a MalformedCorpusError and returned, along with the most recent
   976  // error.
   977  func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) {
   978  	files, err := os.ReadDir(dir)
   979  	if os.IsNotExist(err) {
   980  		return nil, nil // No corpus to read
   981  	} else if err != nil {
   982  		return nil, fmt.Errorf("reading seed corpus from testdata: %v", err)
   983  	}
   984  	var corpus []CorpusEntry
   985  	var errs []error
   986  	for _, file := range files {
   987  		// TODO(jayconrod,katiehockman): determine when a file is a fuzzing input
   988  		// based on its name. We should only read files created by writeToCorpus.
   989  		// If we read ALL files, we won't be able to change the file format by
   990  		// changing the extension. We also won't be able to add files like
   991  		// README.txt explaining why the directory exists.
   992  		if file.IsDir() {
   993  			continue
   994  		}
   995  		filename := filepath.Join(dir, file.Name())
   996  		data, err := os.ReadFile(filename)
   997  		if err != nil {
   998  			return nil, fmt.Errorf("failed to read corpus file: %v", err)
   999  		}
  1000  		var vals []any
  1001  		vals, err = readCorpusData(data, types)
  1002  		if err != nil {
  1003  			errs = append(errs, fmt.Errorf("%q: %v", filename, err))
  1004  			continue
  1005  		}
  1006  		corpus = append(corpus, CorpusEntry{Path: filename, Values: vals})
  1007  	}
  1008  	if len(errs) > 0 {
  1009  		return corpus, &MalformedCorpusError{errs: errs}
  1010  	}
  1011  	return corpus, nil
  1012  }
  1013  
  1014  func readCorpusData(data []byte, types []reflect.Type) ([]any, error) {
  1015  	vals, err := unmarshalCorpusFile(data)
  1016  	if err != nil {
  1017  		return nil, fmt.Errorf("unmarshal: %v", err)
  1018  	}
  1019  	if err = CheckCorpus(vals, types); err != nil {
  1020  		return nil, err
  1021  	}
  1022  	return vals, nil
  1023  }
  1024  
  1025  // CheckCorpus verifies that the types in vals match the expected types
  1026  // provided.
  1027  func CheckCorpus(vals []any, types []reflect.Type) error {
  1028  	if len(vals) != len(types) {
  1029  		return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types))
  1030  	}
  1031  	valsT := make([]reflect.Type, len(vals))
  1032  	for valsI, v := range vals {
  1033  		valsT[valsI] = reflect.TypeOf(v)
  1034  	}
  1035  	for i := range types {
  1036  		if valsT[i] != types[i] {
  1037  			return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types)
  1038  		}
  1039  	}
  1040  	return nil
  1041  }
  1042  
  1043  // writeToCorpus atomically writes the given bytes to a new file in testdata. If
  1044  // the directory does not exist, it will create one. If the file already exists,
  1045  // writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new
  1046  // file that was just written or an error if it failed.
  1047  func writeToCorpus(entry *CorpusEntry, dir string) (err error) {
  1048  	sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))[:16]
  1049  	entry.Path = filepath.Join(dir, sum)
  1050  	if err := os.MkdirAll(dir, 0777); err != nil {
  1051  		return err
  1052  	}
  1053  	if err := os.WriteFile(entry.Path, entry.Data, 0666); err != nil {
  1054  		os.Remove(entry.Path) // remove partially written file
  1055  		return err
  1056  	}
  1057  	return nil
  1058  }
  1059  
  1060  func testName(path string) string {
  1061  	return filepath.Base(path)
  1062  }
  1063  
  1064  func zeroValue(t reflect.Type) any {
  1065  	for _, v := range zeroVals {
  1066  		if reflect.TypeOf(v) == t {
  1067  			return v
  1068  		}
  1069  	}
  1070  	panic(fmt.Sprintf("unsupported type: %v", t))
  1071  }
  1072  
  1073  var zeroVals []any = []any{
  1074  	[]byte(""),
  1075  	string(""),
  1076  	false,
  1077  	byte(0),
  1078  	rune(0),
  1079  	float32(0),
  1080  	float64(0),
  1081  	int(0),
  1082  	int8(0),
  1083  	int16(0),
  1084  	int32(0),
  1085  	int64(0),
  1086  	uint(0),
  1087  	uint8(0),
  1088  	uint16(0),
  1089  	uint32(0),
  1090  	uint64(0),
  1091  }
  1092  
  1093  var debugInfo = godebug.New("#fuzzdebug").Value() == "1"
  1094  
  1095  func shouldPrintDebugInfo() bool {
  1096  	return debugInfo
  1097  }
  1098  
  1099  func (c *coordinator) debugLogf(format string, args ...any) {
  1100  	t := time.Now().Format("2006-01-02 15:04:05.999999999")
  1101  	fmt.Fprintf(c.opts.Log, t+" DEBUG "+format+"\n", args...)
  1102  }
  1103  

View as plain text