1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bisect can be used by compilers and other programs 6 // to serve as a target for the bisect debugging tool. 7 // See [golang.org/x/tools/cmd/bisect] for details about using the tool. 8 // 9 // To be a bisect target, allowing bisect to help determine which of a set of independent 10 // changes provokes a failure, a program needs to: 11 // 12 // 1. Define a way to accept a change pattern on its command line or in its environment. 13 // The most common mechanism is a command-line flag. 14 // The pattern can be passed to [New] to create a [Matcher], the compiled form of a pattern. 15 // 16 // 2. Assign each change a unique ID. One possibility is to use a sequence number, 17 // but the most common mechanism is to hash some kind of identifying information 18 // like the file and line number where the change might be applied. 19 // [Hash] hashes its arguments to compute an ID. 20 // 21 // 3. Enable each change that the pattern says should be enabled. 22 // The [Matcher.Enable] method answers this question for a given change ID. 23 // 24 // 4. Report each change that the pattern says should be reported. 25 // The [Matcher.Report] method answers this question for a given change ID. 26 // The report consists of one more lines on standard error or standard output 27 // that contain a “match marker”. [Marker] returns the match marker for a given ID. 28 // When bisect reports a change as causing the failure, it identifies the change 29 // by printing those report lines, with the match marker removed. 30 // 31 // # Example Usage 32 // 33 // A program starts by defining how it receives the pattern. In this example, we will assume a flag. 34 // The next step is to compile the pattern: 35 // 36 // m, err := bisect.New(patternFlag) 37 // if err != nil { 38 // log.Fatal(err) 39 // } 40 // 41 // Then, each time a potential change is considered, the program computes 42 // a change ID by hashing identifying information (source file and line, in this case) 43 // and then calls m.ShouldEnable and m.ShouldReport to decide whether to 44 // enable and report the change, respectively: 45 // 46 // for each change { 47 // h := bisect.Hash(file, line) 48 // if m.ShouldEnable(h) { 49 // enableChange() 50 // } 51 // if m.ShouldReport(h) { 52 // log.Printf("%v %s:%d", bisect.Marker(h), file, line) 53 // } 54 // } 55 // 56 // Note that the two return different values when bisect is searching for a 57 // minimal set of changes to disable to provoke a failure. 58 // 59 // Finally, note that New returns a nil Matcher when there is no pattern, 60 // meaning that the target is not running under bisect at all. 61 // In that common case, the computation of the hash can be avoided entirely 62 // by checking for m == nil first: 63 // 64 // for each change { 65 // if m == nil { 66 // enableChange() 67 // } else { 68 // h := bisect.Hash(file, line) 69 // if m.ShouldEnable(h) { 70 // enableChange() 71 // } 72 // if m.ShouldReport(h) { 73 // log.Printf("%v %s:%d", bisect.Marker(h), file, line) 74 // } 75 // } 76 // } 77 // 78 // # Pattern Syntax 79 // 80 // Patterns are generated by the bisect tool and interpreted by [New]. 81 // Users should not have to understand the patterns except when 82 // debugging a target's bisect support or debugging the bisect tool itself. 83 // 84 // The pattern syntax selecting a change is a sequence of bit strings 85 // separated by + and - operators. Each bit string denotes the set of 86 // changes with IDs ending in those bits, + is set addition, - is set subtraction, 87 // and the expression is evaluated in the usual left-to-right order. 88 // The special binary number “y” denotes the set of all changes, 89 // standing in for the empty bit string. 90 // In the expression, all the + operators must appear before all the - operators. 91 // A leading + adds to an empty set. A leading - subtracts from the set of all 92 // possible suffixes. 93 // 94 // For example: 95 // 96 // - “01+10” and “+01+10” both denote the set of changes 97 // with IDs ending with the bits 01 or 10. 98 // 99 // - “01+10-1001” denotes the set of changes with IDs 100 // ending with the bits 01 or 10, but excluding those ending in 1001. 101 // 102 // - “-01-1000” and “y-01-1000 both denote the set of all changes 103 // with IDs not ending in 01 nor 1000. 104 // 105 // - “0+1-01+001” is not a valid pattern, because all the + operators do not 106 // appear before all the - operators. 107 // 108 // In the syntaxes described so far, the pattern specifies the changes to 109 // enable and report. If a pattern is prefixed by a “!”, the meaning 110 // changes: the pattern specifies the changes to DISABLE and report. This 111 // mode of operation is needed when a program passes with all changes 112 // enabled but fails with no changes enabled. In this case, bisect 113 // searches for minimal sets of changes to disable. 114 // Put another way, the leading “!” inverts the result from [Matcher.ShouldEnable] 115 // but does not invert the result from [Matcher.ShouldReport]. 116 // 117 // As a convenience for manual debugging, “n” is an alias for “!y”, 118 // meaning to disable and report all changes. 119 // 120 // Finally, a leading “v” in the pattern indicates that the reports will be shown 121 // to the user of bisect to describe the changes involved in a failure. 122 // At the API level, the leading “v” causes [Matcher.Visible] to return true. 123 // See the next section for details. 124 // 125 // # Match Reports 126 // 127 // The target program must enable only those changed matched 128 // by the pattern, and it must print a match report for each such change. 129 // A match report consists of one or more lines of text that will be 130 // printed by the bisect tool to describe a change implicated in causing 131 // a failure. Each line in the report for a given change must contain a 132 // match marker with that change ID, as returned by [Marker]. 133 // The markers are elided when displaying the lines to the user. 134 // 135 // A match marker has the form “[bisect-match 0x1234]” where 136 // 0x1234 is the change ID in hexadecimal. 137 // An alternate form is “[bisect-match 010101]”, giving the change ID in binary. 138 // 139 // When [Matcher.Visible] returns false, the match reports are only 140 // being processed by bisect to learn the set of enabled changes, 141 // not shown to the user, meaning that each report can be a match 142 // marker on a line by itself, eliding the usual textual description. 143 // When the textual description is expensive to compute, 144 // checking [Matcher.Visible] can help the avoid that expense 145 // in most runs. 146 package bisect 147 148 // New creates and returns a new Matcher implementing the given pattern. 149 // The pattern syntax is defined in the package doc comment. 150 // 151 // In addition to the pattern syntax syntax, New("") returns nil, nil. 152 // The nil *Matcher is valid for use: it returns true from ShouldEnable 153 // and false from ShouldReport for all changes. Callers can avoid calling 154 // [Hash], [Matcher.ShouldEnable], and [Matcher.ShouldPrint] entirely 155 // when they recognize the nil Matcher. 156 func New(pattern string) (*Matcher, error) { 157 if pattern == "" { 158 return nil, nil 159 } 160 161 m := new(Matcher) 162 163 // Allow multiple v, so that “bisect cmd vPATTERN” can force verbose all the time. 164 p := pattern 165 for len(p) > 0 && p[0] == 'v' { 166 m.verbose = true 167 p = p[1:] 168 if p == "" { 169 return nil, &parseError{"invalid pattern syntax: " + pattern} 170 } 171 } 172 173 // Allow multiple !, each negating the last, so that “bisect cmd !PATTERN” works 174 // even when bisect chooses to add its own !. 175 m.enable = true 176 for len(p) > 0 && p[0] == '!' { 177 m.enable = !m.enable 178 p = p[1:] 179 if p == "" { 180 return nil, &parseError{"invalid pattern syntax: " + pattern} 181 } 182 } 183 184 if p == "n" { 185 // n is an alias for !y. 186 m.enable = !m.enable 187 p = "y" 188 } 189 190 // Parse actual pattern syntax. 191 result := true 192 bits := uint64(0) 193 start := 0 194 wid := 1 // 1-bit (binary); sometimes 4-bit (hex) 195 for i := 0; i <= len(p); i++ { 196 // Imagine a trailing - at the end of the pattern to flush final suffix 197 c := byte('-') 198 if i < len(p) { 199 c = p[i] 200 } 201 if i == start && wid == 1 && c == 'x' { // leading x for hex 202 start = i + 1 203 wid = 4 204 continue 205 } 206 switch c { 207 default: 208 return nil, &parseError{"invalid pattern syntax: " + pattern} 209 case '2', '3', '4', '5', '6', '7', '8', '9': 210 if wid != 4 { 211 return nil, &parseError{"invalid pattern syntax: " + pattern} 212 } 213 fallthrough 214 case '0', '1': 215 bits <<= wid 216 bits |= uint64(c - '0') 217 case 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F': 218 if wid != 4 { 219 return nil, &parseError{"invalid pattern syntax: " + pattern} 220 } 221 bits <<= 4 222 bits |= uint64(c&^0x20 - 'A' + 10) 223 case 'y': 224 if i+1 < len(p) && (p[i+1] == '0' || p[i+1] == '1') { 225 return nil, &parseError{"invalid pattern syntax: " + pattern} 226 } 227 bits = 0 228 case '+', '-': 229 if c == '+' && result == false { 230 // Have already seen a -. Should be - from here on. 231 return nil, &parseError{"invalid pattern syntax (+ after -): " + pattern} 232 } 233 if i > 0 { 234 n := (i - start) * wid 235 if n > 64 { 236 return nil, &parseError{"pattern bits too long: " + pattern} 237 } 238 if n <= 0 { 239 return nil, &parseError{"invalid pattern syntax: " + pattern} 240 } 241 if p[start] == 'y' { 242 n = 0 243 } 244 mask := uint64(1)<<n - 1 245 m.list = append(m.list, cond{mask, bits, result}) 246 } else if c == '-' { 247 // leading - subtracts from complete set 248 m.list = append(m.list, cond{0, 0, true}) 249 } 250 bits = 0 251 result = c == '+' 252 start = i + 1 253 wid = 1 254 } 255 } 256 return m, nil 257 } 258 259 // A Matcher is the parsed, compiled form of a PATTERN string. 260 // The nil *Matcher is valid: it has all changes enabled but none reported. 261 type Matcher struct { 262 verbose bool 263 enable bool // when true, list is for “enable and report” (when false, “disable and report”) 264 list []cond // conditions; later ones win over earlier ones 265 } 266 267 // A cond is a single condition in the matcher. 268 // Given an input id, if id&mask == bits, return the result. 269 type cond struct { 270 mask uint64 271 bits uint64 272 result bool 273 } 274 275 // Verbose reports whether the reports will be shown to users 276 // and need to include a human-readable change description. 277 // If not, the target can print just the Marker on a line by itself 278 // and perhaps save some computation. 279 func (m *Matcher) Verbose() bool { 280 return m.verbose 281 } 282 283 // ShouldEnable reports whether the change with the given id should be enabled. 284 func (m *Matcher) ShouldEnable(id uint64) bool { 285 if m == nil { 286 return true 287 } 288 for i := len(m.list) - 1; i >= 0; i-- { 289 c := &m.list[i] 290 if id&c.mask == c.bits { 291 return c.result == m.enable 292 } 293 } 294 return false == m.enable 295 } 296 297 // ShouldReport reports whether the change with the given id should be reported. 298 func (m *Matcher) ShouldReport(id uint64) bool { 299 if m == nil { 300 return false 301 } 302 for i := len(m.list) - 1; i >= 0; i-- { 303 c := &m.list[i] 304 if id&c.mask == c.bits { 305 return c.result 306 } 307 } 308 return false 309 } 310 311 // Marker returns the match marker text to use on any line reporting details 312 // about a match of the given ID. 313 // It always returns the hexadecimal format. 314 func Marker(id uint64) string { 315 return string(AppendMarker(nil, id)) 316 } 317 318 // AppendMarker is like [Marker] but appends the marker to dst. 319 func AppendMarker(dst []byte, id uint64) []byte { 320 const prefix = "[bisect-match 0x" 321 var buf [len(prefix) + 16 + 1]byte 322 copy(buf[:], prefix) 323 for i := 0; i < 16; i++ { 324 buf[len(prefix)+i] = "0123456789abcdef"[id>>60] 325 id <<= 4 326 } 327 buf[len(prefix)+16] = ']' 328 return append(dst, buf[:]...) 329 } 330 331 // CutMarker finds the first match marker in line and removes it, 332 // returning the shortened line (with the marker removed), 333 // the ID from the match marker, 334 // and whether a marker was found at all. 335 // If there is no marker, CutMarker returns line, 0, false. 336 func CutMarker(line string) (short string, id uint64, ok bool) { 337 // Find first instance of prefix. 338 prefix := "[bisect-match " 339 i := 0 340 for ; ; i++ { 341 if i >= len(line)-len(prefix) { 342 return line, 0, false 343 } 344 if line[i] == '[' && line[i:i+len(prefix)] == prefix { 345 break 346 } 347 } 348 349 // Scan to ]. 350 j := i + len(prefix) 351 for j < len(line) && line[j] != ']' { 352 j++ 353 } 354 if j >= len(line) { 355 return line, 0, false 356 } 357 358 // Parse id. 359 idstr := line[i+len(prefix) : j] 360 if len(idstr) >= 3 && idstr[:2] == "0x" { 361 // parse hex 362 if len(idstr) > 2+16 { // max 0x + 16 digits 363 return line, 0, false 364 } 365 for i := 2; i < len(idstr); i++ { 366 id <<= 4 367 switch c := idstr[i]; { 368 case '0' <= c && c <= '9': 369 id |= uint64(c - '0') 370 case 'a' <= c && c <= 'f': 371 id |= uint64(c - 'a' + 10) 372 case 'A' <= c && c <= 'F': 373 id |= uint64(c - 'A' + 10) 374 } 375 } 376 } else { 377 if idstr == "" || len(idstr) > 64 { // min 1 digit, max 64 digits 378 return line, 0, false 379 } 380 // parse binary 381 for i := 0; i < len(idstr); i++ { 382 id <<= 1 383 switch c := idstr[i]; c { 384 default: 385 return line, 0, false 386 case '0', '1': 387 id |= uint64(c - '0') 388 } 389 } 390 } 391 392 // Construct shortened line. 393 // Remove at most one space from around the marker, 394 // so that "foo [marker] bar" shortens to "foo bar". 395 j++ // skip ] 396 if i > 0 && line[i-1] == ' ' { 397 i-- 398 } else if j < len(line) && line[j] == ' ' { 399 j++ 400 } 401 short = line[:i] + line[j:] 402 return short, id, true 403 } 404 405 // Hash computes a hash of the data arguments, 406 // each of which must be of type string, byte, int, uint, int32, uint32, int64, uint64, uintptr, or a slice of one of those types. 407 func Hash(data ...any) uint64 { 408 h := offset64 409 for _, v := range data { 410 switch v := v.(type) { 411 default: 412 // Note: Not printing the type, because reflect.ValueOf(v) 413 // would make the interfaces prepared by the caller escape 414 // and therefore allocate. This way, Hash(file, line) runs 415 // without any allocation. It should be clear from the 416 // source code calling Hash what the bad argument was. 417 panic("bisect.Hash: unexpected argument type") 418 case string: 419 h = fnvString(h, v) 420 case byte: 421 h = fnv(h, v) 422 case int: 423 h = fnvUint64(h, uint64(v)) 424 case uint: 425 h = fnvUint64(h, uint64(v)) 426 case int32: 427 h = fnvUint32(h, uint32(v)) 428 case uint32: 429 h = fnvUint32(h, v) 430 case int64: 431 h = fnvUint64(h, uint64(v)) 432 case uint64: 433 h = fnvUint64(h, v) 434 case uintptr: 435 h = fnvUint64(h, uint64(v)) 436 case []string: 437 for _, x := range v { 438 h = fnvString(h, x) 439 } 440 case []byte: 441 for _, x := range v { 442 h = fnv(h, x) 443 } 444 case []int: 445 for _, x := range v { 446 h = fnvUint64(h, uint64(x)) 447 } 448 case []uint: 449 for _, x := range v { 450 h = fnvUint64(h, uint64(x)) 451 } 452 case []int32: 453 for _, x := range v { 454 h = fnvUint32(h, uint32(x)) 455 } 456 case []uint32: 457 for _, x := range v { 458 h = fnvUint32(h, x) 459 } 460 case []int64: 461 for _, x := range v { 462 h = fnvUint64(h, uint64(x)) 463 } 464 case []uint64: 465 for _, x := range v { 466 h = fnvUint64(h, x) 467 } 468 case []uintptr: 469 for _, x := range v { 470 h = fnvUint64(h, uint64(x)) 471 } 472 } 473 } 474 return h 475 } 476 477 // Trivial error implementation, here to avoid importing errors. 478 479 type parseError struct{ text string } 480 481 func (e *parseError) Error() string { return e.text } 482 483 // FNV-1a implementation. See Go's hash/fnv/fnv.go. 484 // Copied here for simplicity (can handle uints directly) 485 // and to avoid the dependency. 486 487 const ( 488 offset64 uint64 = 14695981039346656037 489 prime64 uint64 = 1099511628211 490 ) 491 492 func fnv(h uint64, x byte) uint64 { 493 h ^= uint64(x) 494 h *= prime64 495 return h 496 } 497 498 func fnvString(h uint64, x string) uint64 { 499 for i := 0; i < len(x); i++ { 500 h ^= uint64(x[i]) 501 h *= prime64 502 } 503 return h 504 } 505 506 func fnvUint64(h uint64, x uint64) uint64 { 507 for i := 0; i < 8; i++ { 508 h ^= uint64(x & 0xFF) 509 x >>= 8 510 h *= prime64 511 } 512 return h 513 } 514 515 func fnvUint32(h uint64, x uint32) uint64 { 516 for i := 0; i < 4; i++ { 517 h ^= uint64(x & 0xFF) 518 x >>= 8 519 h *= prime64 520 } 521 return h 522 } 523