...

Source file src/cmd/compile/internal/ssa/prove.go

Documentation: cmd/compile/internal/ssa

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/internal/src"
     9  	"fmt"
    10  	"math"
    11  )
    12  
    13  type branch int
    14  
    15  const (
    16  	unknown branch = iota
    17  	positive
    18  	negative
    19  	// The outedges from a jump table are jumpTable0,
    20  	// jumpTable0+1, jumpTable0+2, etc. There could be an
    21  	// arbitrary number so we can't list them all here.
    22  	jumpTable0
    23  )
    24  
    25  // relation represents the set of possible relations between
    26  // pairs of variables (v, w). Without a priori knowledge the
    27  // mask is lt | eq | gt meaning v can be less than, equal to or
    28  // greater than w. When the execution path branches on the condition
    29  // `v op w` the set of relations is updated to exclude any
    30  // relation not possible due to `v op w` being true (or false).
    31  //
    32  // E.g.
    33  //
    34  //	r := relation(...)
    35  //
    36  //	if v < w {
    37  //	  newR := r & lt
    38  //	}
    39  //	if v >= w {
    40  //	  newR := r & (eq|gt)
    41  //	}
    42  //	if v != w {
    43  //	  newR := r & (lt|gt)
    44  //	}
    45  type relation uint
    46  
    47  const (
    48  	lt relation = 1 << iota
    49  	eq
    50  	gt
    51  )
    52  
    53  var relationStrings = [...]string{
    54  	0: "none", lt: "<", eq: "==", lt | eq: "<=",
    55  	gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any",
    56  }
    57  
    58  func (r relation) String() string {
    59  	if r < relation(len(relationStrings)) {
    60  		return relationStrings[r]
    61  	}
    62  	return fmt.Sprintf("relation(%d)", uint(r))
    63  }
    64  
    65  // domain represents the domain of a variable pair in which a set
    66  // of relations is known. For example, relations learned for unsigned
    67  // pairs cannot be transferred to signed pairs because the same bit
    68  // representation can mean something else.
    69  type domain uint
    70  
    71  const (
    72  	signed domain = 1 << iota
    73  	unsigned
    74  	pointer
    75  	boolean
    76  )
    77  
    78  var domainStrings = [...]string{
    79  	"signed", "unsigned", "pointer", "boolean",
    80  }
    81  
    82  func (d domain) String() string {
    83  	s := ""
    84  	for i, ds := range domainStrings {
    85  		if d&(1<<uint(i)) != 0 {
    86  			if len(s) != 0 {
    87  				s += "|"
    88  			}
    89  			s += ds
    90  			d &^= 1 << uint(i)
    91  		}
    92  	}
    93  	if d != 0 {
    94  		if len(s) != 0 {
    95  			s += "|"
    96  		}
    97  		s += fmt.Sprintf("0x%x", uint(d))
    98  	}
    99  	return s
   100  }
   101  
   102  type pair struct {
   103  	// a pair of values, ordered by ID.
   104  	// v can be nil, to mean the zero value.
   105  	// for booleans the zero value (v == nil) is false.
   106  	v, w *Value
   107  	d    domain
   108  }
   109  
   110  // fact is a pair plus a relation for that pair.
   111  type fact struct {
   112  	p pair
   113  	r relation
   114  }
   115  
   116  // a limit records known upper and lower bounds for a value.
   117  type limit struct {
   118  	min, max   int64  // min <= value <= max, signed
   119  	umin, umax uint64 // umin <= value <= umax, unsigned
   120  }
   121  
   122  func (l limit) String() string {
   123  	return fmt.Sprintf("sm,SM,um,UM=%d,%d,%d,%d", l.min, l.max, l.umin, l.umax)
   124  }
   125  
   126  func (l limit) intersect(l2 limit) limit {
   127  	if l.min < l2.min {
   128  		l.min = l2.min
   129  	}
   130  	if l.umin < l2.umin {
   131  		l.umin = l2.umin
   132  	}
   133  	if l.max > l2.max {
   134  		l.max = l2.max
   135  	}
   136  	if l.umax > l2.umax {
   137  		l.umax = l2.umax
   138  	}
   139  	return l
   140  }
   141  
   142  var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64}
   143  
   144  // a limitFact is a limit known for a particular value.
   145  type limitFact struct {
   146  	vid   ID
   147  	limit limit
   148  }
   149  
   150  // factsTable keeps track of relations between pairs of values.
   151  //
   152  // The fact table logic is sound, but incomplete. Outside of a few
   153  // special cases, it performs no deduction or arithmetic. While there
   154  // are known decision procedures for this, the ad hoc approach taken
   155  // by the facts table is effective for real code while remaining very
   156  // efficient.
   157  type factsTable struct {
   158  	// unsat is true if facts contains a contradiction.
   159  	//
   160  	// Note that the factsTable logic is incomplete, so if unsat
   161  	// is false, the assertions in factsTable could be satisfiable
   162  	// *or* unsatisfiable.
   163  	unsat      bool // true if facts contains a contradiction
   164  	unsatDepth int  // number of unsat checkpoints
   165  
   166  	facts map[pair]relation // current known set of relation
   167  	stack []fact            // previous sets of relations
   168  
   169  	// order* is a couple of partial order sets that record information
   170  	// about relations between SSA values in the signed and unsigned
   171  	// domain.
   172  	orderS *poset
   173  	orderU *poset
   174  
   175  	// known lower and upper bounds on individual values.
   176  	limits     map[ID]limit
   177  	limitStack []limitFact // previous entries
   178  
   179  	// For each slice s, a map from s to a len(s)/cap(s) value (if any)
   180  	// TODO: check if there are cases that matter where we have
   181  	// more than one len(s) for a slice. We could keep a list if necessary.
   182  	lens map[ID]*Value
   183  	caps map[ID]*Value
   184  
   185  	// zero is a zero-valued constant
   186  	zero *Value
   187  }
   188  
   189  // checkpointFact is an invalid value used for checkpointing
   190  // and restoring factsTable.
   191  var checkpointFact = fact{}
   192  var checkpointBound = limitFact{}
   193  
   194  func newFactsTable(f *Func) *factsTable {
   195  	ft := &factsTable{}
   196  	ft.orderS = f.newPoset()
   197  	ft.orderU = f.newPoset()
   198  	ft.orderS.SetUnsigned(false)
   199  	ft.orderU.SetUnsigned(true)
   200  	ft.facts = make(map[pair]relation)
   201  	ft.stack = make([]fact, 4)
   202  	ft.limits = make(map[ID]limit)
   203  	ft.limitStack = make([]limitFact, 4)
   204  	ft.zero = f.ConstInt64(f.Config.Types.Int64, 0)
   205  	return ft
   206  }
   207  
   208  // update updates the set of relations between v and w in domain d
   209  // restricting it to r.
   210  func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) {
   211  	if parent.Func.pass.debug > 2 {
   212  		parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s", parent, v, w, r)
   213  	}
   214  	// No need to do anything else if we already found unsat.
   215  	if ft.unsat {
   216  		return
   217  	}
   218  
   219  	// Self-fact. It's wasteful to register it into the facts
   220  	// table, so just note whether it's satisfiable
   221  	if v == w {
   222  		if r&eq == 0 {
   223  			ft.unsat = true
   224  		}
   225  		return
   226  	}
   227  
   228  	if d == signed || d == unsigned {
   229  		var ok bool
   230  		order := ft.orderS
   231  		if d == unsigned {
   232  			order = ft.orderU
   233  		}
   234  		switch r {
   235  		case lt:
   236  			ok = order.SetOrder(v, w)
   237  		case gt:
   238  			ok = order.SetOrder(w, v)
   239  		case lt | eq:
   240  			ok = order.SetOrderOrEqual(v, w)
   241  		case gt | eq:
   242  			ok = order.SetOrderOrEqual(w, v)
   243  		case eq:
   244  			ok = order.SetEqual(v, w)
   245  		case lt | gt:
   246  			ok = order.SetNonEqual(v, w)
   247  		default:
   248  			panic("unknown relation")
   249  		}
   250  		if !ok {
   251  			if parent.Func.pass.debug > 2 {
   252  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   253  			}
   254  			ft.unsat = true
   255  			return
   256  		}
   257  	} else {
   258  		if lessByID(w, v) {
   259  			v, w = w, v
   260  			r = reverseBits[r]
   261  		}
   262  
   263  		p := pair{v, w, d}
   264  		oldR, ok := ft.facts[p]
   265  		if !ok {
   266  			if v == w {
   267  				oldR = eq
   268  			} else {
   269  				oldR = lt | eq | gt
   270  			}
   271  		}
   272  		// No changes compared to information already in facts table.
   273  		if oldR == r {
   274  			return
   275  		}
   276  		ft.stack = append(ft.stack, fact{p, oldR})
   277  		ft.facts[p] = oldR & r
   278  		// If this relation is not satisfiable, mark it and exit right away
   279  		if oldR&r == 0 {
   280  			if parent.Func.pass.debug > 2 {
   281  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   282  			}
   283  			ft.unsat = true
   284  			return
   285  		}
   286  	}
   287  
   288  	// Extract bounds when comparing against constants
   289  	if v.isGenericIntConst() {
   290  		v, w = w, v
   291  		r = reverseBits[r]
   292  	}
   293  	if v != nil && w.isGenericIntConst() {
   294  		// Note: all the +1/-1 below could overflow/underflow. Either will
   295  		// still generate correct results, it will just lead to imprecision.
   296  		// In fact if there is overflow/underflow, the corresponding
   297  		// code is unreachable because the known range is outside the range
   298  		// of the value's type.
   299  		old, ok := ft.limits[v.ID]
   300  		if !ok {
   301  			old = noLimit
   302  			if v.isGenericIntConst() {
   303  				switch d {
   304  				case signed:
   305  					old.min, old.max = v.AuxInt, v.AuxInt
   306  					if v.AuxInt >= 0 {
   307  						old.umin, old.umax = uint64(v.AuxInt), uint64(v.AuxInt)
   308  					}
   309  				case unsigned:
   310  					old.umin = v.AuxUnsigned()
   311  					old.umax = old.umin
   312  					if int64(old.umin) >= 0 {
   313  						old.min, old.max = int64(old.umin), int64(old.umin)
   314  					}
   315  				}
   316  			}
   317  		}
   318  		lim := noLimit
   319  		switch d {
   320  		case signed:
   321  			c := w.AuxInt
   322  			switch r {
   323  			case lt:
   324  				lim.max = c - 1
   325  			case lt | eq:
   326  				lim.max = c
   327  			case gt | eq:
   328  				lim.min = c
   329  			case gt:
   330  				lim.min = c + 1
   331  			case lt | gt:
   332  				lim = old
   333  				if c == lim.min {
   334  					lim.min++
   335  				}
   336  				if c == lim.max {
   337  					lim.max--
   338  				}
   339  			case eq:
   340  				lim.min = c
   341  				lim.max = c
   342  			}
   343  			if lim.min >= 0 {
   344  				// int(x) >= 0 && int(x) >= N  ⇒  uint(x) >= N
   345  				lim.umin = uint64(lim.min)
   346  			}
   347  			if lim.max != noLimit.max && old.min >= 0 && lim.max >= 0 {
   348  				// 0 <= int(x) <= N  ⇒  0 <= uint(x) <= N
   349  				// This is for a max update, so the lower bound
   350  				// comes from what we already know (old).
   351  				lim.umax = uint64(lim.max)
   352  			}
   353  		case unsigned:
   354  			uc := w.AuxUnsigned()
   355  			switch r {
   356  			case lt:
   357  				lim.umax = uc - 1
   358  			case lt | eq:
   359  				lim.umax = uc
   360  			case gt | eq:
   361  				lim.umin = uc
   362  			case gt:
   363  				lim.umin = uc + 1
   364  			case lt | gt:
   365  				lim = old
   366  				if uc == lim.umin {
   367  					lim.umin++
   368  				}
   369  				if uc == lim.umax {
   370  					lim.umax--
   371  				}
   372  			case eq:
   373  				lim.umin = uc
   374  				lim.umax = uc
   375  			}
   376  			// We could use the contrapositives of the
   377  			// signed implications to derive signed facts,
   378  			// but it turns out not to matter.
   379  		}
   380  		ft.limitStack = append(ft.limitStack, limitFact{v.ID, old})
   381  		lim = old.intersect(lim)
   382  		ft.limits[v.ID] = lim
   383  		if v.Block.Func.pass.debug > 2 {
   384  			v.Block.Func.Warnl(parent.Pos, "parent=%s, new limits %s %s %s %s", parent, v, w, r, lim.String())
   385  		}
   386  		if lim.min > lim.max || lim.umin > lim.umax {
   387  			ft.unsat = true
   388  			return
   389  		}
   390  	}
   391  
   392  	// Derived facts below here are only about numbers.
   393  	if d != signed && d != unsigned {
   394  		return
   395  	}
   396  
   397  	// Additional facts we know given the relationship between len and cap.
   398  	//
   399  	// TODO: Since prove now derives transitive relations, it
   400  	// should be sufficient to learn that len(w) <= cap(w) at the
   401  	// beginning of prove where we look for all len/cap ops.
   402  	if v.Op == OpSliceLen && r&lt == 0 && ft.caps[v.Args[0].ID] != nil {
   403  		// len(s) > w implies cap(s) > w
   404  		// len(s) >= w implies cap(s) >= w
   405  		// len(s) == w implies cap(s) >= w
   406  		ft.update(parent, ft.caps[v.Args[0].ID], w, d, r|gt)
   407  	}
   408  	if w.Op == OpSliceLen && r&gt == 0 && ft.caps[w.Args[0].ID] != nil {
   409  		// same, length on the RHS.
   410  		ft.update(parent, v, ft.caps[w.Args[0].ID], d, r|lt)
   411  	}
   412  	if v.Op == OpSliceCap && r&gt == 0 && ft.lens[v.Args[0].ID] != nil {
   413  		// cap(s) < w implies len(s) < w
   414  		// cap(s) <= w implies len(s) <= w
   415  		// cap(s) == w implies len(s) <= w
   416  		ft.update(parent, ft.lens[v.Args[0].ID], w, d, r|lt)
   417  	}
   418  	if w.Op == OpSliceCap && r&lt == 0 && ft.lens[w.Args[0].ID] != nil {
   419  		// same, capacity on the RHS.
   420  		ft.update(parent, v, ft.lens[w.Args[0].ID], d, r|gt)
   421  	}
   422  
   423  	// Process fence-post implications.
   424  	//
   425  	// First, make the condition > or >=.
   426  	if r == lt || r == lt|eq {
   427  		v, w = w, v
   428  		r = reverseBits[r]
   429  	}
   430  	switch r {
   431  	case gt:
   432  		if x, delta := isConstDelta(v); x != nil && delta == 1 {
   433  			// x+1 > w  ⇒  x >= w
   434  			//
   435  			// This is useful for eliminating the
   436  			// growslice branch of append.
   437  			ft.update(parent, x, w, d, gt|eq)
   438  		} else if x, delta := isConstDelta(w); x != nil && delta == -1 {
   439  			// v > x-1  ⇒  v >= x
   440  			ft.update(parent, v, x, d, gt|eq)
   441  		}
   442  	case gt | eq:
   443  		if x, delta := isConstDelta(v); x != nil && delta == -1 {
   444  			// x-1 >= w && x > min  ⇒  x > w
   445  			//
   446  			// Useful for i > 0; s[i-1].
   447  			lim, ok := ft.limits[x.ID]
   448  			if ok && ((d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0)) {
   449  				ft.update(parent, x, w, d, gt)
   450  			}
   451  		} else if x, delta := isConstDelta(w); x != nil && delta == 1 {
   452  			// v >= x+1 && x < max  ⇒  v > x
   453  			lim, ok := ft.limits[x.ID]
   454  			if ok && ((d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op])) {
   455  				ft.update(parent, v, x, d, gt)
   456  			}
   457  		}
   458  	}
   459  
   460  	// Process: x+delta > w (with delta constant)
   461  	// Only signed domain for now (useful for accesses to slices in loops).
   462  	if r == gt || r == gt|eq {
   463  		if x, delta := isConstDelta(v); x != nil && d == signed {
   464  			if parent.Func.pass.debug > 1 {
   465  				parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d)
   466  			}
   467  			underflow := true
   468  			if l, has := ft.limits[x.ID]; has && delta < 0 {
   469  				if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) ||
   470  					(x.Type.Size() == 4 && l.min >= math.MinInt32-delta) {
   471  					underflow = false
   472  				}
   473  			}
   474  			if delta < 0 && !underflow {
   475  				// If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v)
   476  				ft.update(parent, x, v, signed, gt)
   477  			}
   478  			if !w.isGenericIntConst() {
   479  				// If we know that x+delta > w but w is not constant, we can derive:
   480  				//    if delta < 0 and x+delta cannot underflow, then x > w
   481  				// This is useful for loops with bounds "len(slice)-K" (delta = -K)
   482  				if delta < 0 && !underflow {
   483  					ft.update(parent, x, w, signed, r)
   484  				}
   485  			} else {
   486  				// With w,delta constants, we want to derive: x+delta > w  ⇒  x > w-delta
   487  				//
   488  				// We compute (using integers of the correct size):
   489  				//    min = w - delta
   490  				//    max = MaxInt - delta
   491  				//
   492  				// And we prove that:
   493  				//    if min<max: min < x AND x <= max
   494  				//    if min>max: min < x OR  x <= max
   495  				//
   496  				// This is always correct, even in case of overflow.
   497  				//
   498  				// If the initial fact is x+delta >= w instead, the derived conditions are:
   499  				//    if min<max: min <= x AND x <= max
   500  				//    if min>max: min <= x OR  x <= max
   501  				//
   502  				// Notice the conditions for max are still <=, as they handle overflows.
   503  				var min, max int64
   504  				var vmin, vmax *Value
   505  				switch x.Type.Size() {
   506  				case 8:
   507  					min = w.AuxInt - delta
   508  					max = int64(^uint64(0)>>1) - delta
   509  
   510  					vmin = parent.NewValue0I(parent.Pos, OpConst64, parent.Func.Config.Types.Int64, min)
   511  					vmax = parent.NewValue0I(parent.Pos, OpConst64, parent.Func.Config.Types.Int64, max)
   512  
   513  				case 4:
   514  					min = int64(int32(w.AuxInt) - int32(delta))
   515  					max = int64(int32(^uint32(0)>>1) - int32(delta))
   516  
   517  					vmin = parent.NewValue0I(parent.Pos, OpConst32, parent.Func.Config.Types.Int32, min)
   518  					vmax = parent.NewValue0I(parent.Pos, OpConst32, parent.Func.Config.Types.Int32, max)
   519  
   520  				case 2:
   521  					min = int64(int16(w.AuxInt) - int16(delta))
   522  					max = int64(int16(^uint16(0)>>1) - int16(delta))
   523  
   524  					vmin = parent.NewValue0I(parent.Pos, OpConst16, parent.Func.Config.Types.Int16, min)
   525  					vmax = parent.NewValue0I(parent.Pos, OpConst16, parent.Func.Config.Types.Int16, max)
   526  
   527  				case 1:
   528  					min = int64(int8(w.AuxInt) - int8(delta))
   529  					max = int64(int8(^uint8(0)>>1) - int8(delta))
   530  
   531  					vmin = parent.NewValue0I(parent.Pos, OpConst8, parent.Func.Config.Types.Int8, min)
   532  					vmax = parent.NewValue0I(parent.Pos, OpConst8, parent.Func.Config.Types.Int8, max)
   533  
   534  				default:
   535  					panic("unimplemented")
   536  				}
   537  
   538  				if min < max {
   539  					// Record that x > min and max >= x
   540  					ft.update(parent, x, vmin, d, r)
   541  					ft.update(parent, vmax, x, d, r|eq)
   542  				} else {
   543  					// We know that either x>min OR x<=max. factsTable cannot record OR conditions,
   544  					// so let's see if we can already prove that one of them is false, in which case
   545  					// the other must be true
   546  					if l, has := ft.limits[x.ID]; has {
   547  						if l.max <= min {
   548  							if r&eq == 0 || l.max < min {
   549  								// x>min (x>=min) is impossible, so it must be x<=max
   550  								ft.update(parent, vmax, x, d, r|eq)
   551  							}
   552  						} else if l.min > max {
   553  							// x<=max is impossible, so it must be x>min
   554  							ft.update(parent, x, vmin, d, r)
   555  						}
   556  					}
   557  				}
   558  			}
   559  		}
   560  	}
   561  
   562  	// Look through value-preserving extensions.
   563  	// If the domain is appropriate for the pre-extension Type,
   564  	// repeat the update with the pre-extension Value.
   565  	if isCleanExt(v) {
   566  		switch {
   567  		case d == signed && v.Args[0].Type.IsSigned():
   568  			fallthrough
   569  		case d == unsigned && !v.Args[0].Type.IsSigned():
   570  			ft.update(parent, v.Args[0], w, d, r)
   571  		}
   572  	}
   573  	if isCleanExt(w) {
   574  		switch {
   575  		case d == signed && w.Args[0].Type.IsSigned():
   576  			fallthrough
   577  		case d == unsigned && !w.Args[0].Type.IsSigned():
   578  			ft.update(parent, v, w.Args[0], d, r)
   579  		}
   580  	}
   581  }
   582  
   583  var opMin = map[Op]int64{
   584  	OpAdd64: math.MinInt64, OpSub64: math.MinInt64,
   585  	OpAdd32: math.MinInt32, OpSub32: math.MinInt32,
   586  }
   587  
   588  var opMax = map[Op]int64{
   589  	OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64,
   590  	OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
   591  }
   592  
   593  var opUMax = map[Op]uint64{
   594  	OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64,
   595  	OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32,
   596  }
   597  
   598  // isNonNegative reports whether v is known to be non-negative.
   599  func (ft *factsTable) isNonNegative(v *Value) bool {
   600  	if isNonNegative(v) {
   601  		return true
   602  	}
   603  
   604  	var max int64
   605  	switch v.Type.Size() {
   606  	case 1:
   607  		max = math.MaxInt8
   608  	case 2:
   609  		max = math.MaxInt16
   610  	case 4:
   611  		max = math.MaxInt32
   612  	case 8:
   613  		max = math.MaxInt64
   614  	default:
   615  		panic("unexpected integer size")
   616  	}
   617  
   618  	// Check if the recorded limits can prove that the value is positive
   619  
   620  	if l, has := ft.limits[v.ID]; has && (l.min >= 0 || l.umax <= uint64(max)) {
   621  		return true
   622  	}
   623  
   624  	// Check if v = x+delta, and we can use x's limits to prove that it's positive
   625  	if x, delta := isConstDelta(v); x != nil {
   626  		if l, has := ft.limits[x.ID]; has {
   627  			if delta > 0 && l.min >= -delta && l.max <= max-delta {
   628  				return true
   629  			}
   630  			if delta < 0 && l.min >= -delta {
   631  				return true
   632  			}
   633  		}
   634  	}
   635  
   636  	// Check if v is a value-preserving extension of a non-negative value.
   637  	if isCleanExt(v) && ft.isNonNegative(v.Args[0]) {
   638  		return true
   639  	}
   640  
   641  	// Check if the signed poset can prove that the value is >= 0
   642  	return ft.orderS.OrderedOrEqual(ft.zero, v)
   643  }
   644  
   645  // checkpoint saves the current state of known relations.
   646  // Called when descending on a branch.
   647  func (ft *factsTable) checkpoint() {
   648  	if ft.unsat {
   649  		ft.unsatDepth++
   650  	}
   651  	ft.stack = append(ft.stack, checkpointFact)
   652  	ft.limitStack = append(ft.limitStack, checkpointBound)
   653  	ft.orderS.Checkpoint()
   654  	ft.orderU.Checkpoint()
   655  }
   656  
   657  // restore restores known relation to the state just
   658  // before the previous checkpoint.
   659  // Called when backing up on a branch.
   660  func (ft *factsTable) restore() {
   661  	if ft.unsatDepth > 0 {
   662  		ft.unsatDepth--
   663  	} else {
   664  		ft.unsat = false
   665  	}
   666  	for {
   667  		old := ft.stack[len(ft.stack)-1]
   668  		ft.stack = ft.stack[:len(ft.stack)-1]
   669  		if old == checkpointFact {
   670  			break
   671  		}
   672  		if old.r == lt|eq|gt {
   673  			delete(ft.facts, old.p)
   674  		} else {
   675  			ft.facts[old.p] = old.r
   676  		}
   677  	}
   678  	for {
   679  		old := ft.limitStack[len(ft.limitStack)-1]
   680  		ft.limitStack = ft.limitStack[:len(ft.limitStack)-1]
   681  		if old.vid == 0 { // checkpointBound
   682  			break
   683  		}
   684  		if old.limit == noLimit {
   685  			delete(ft.limits, old.vid)
   686  		} else {
   687  			ft.limits[old.vid] = old.limit
   688  		}
   689  	}
   690  	ft.orderS.Undo()
   691  	ft.orderU.Undo()
   692  }
   693  
   694  func lessByID(v, w *Value) bool {
   695  	if v == nil && w == nil {
   696  		// Should not happen, but just in case.
   697  		return false
   698  	}
   699  	if v == nil {
   700  		return true
   701  	}
   702  	return w != nil && v.ID < w.ID
   703  }
   704  
   705  var (
   706  	reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7}
   707  
   708  	// maps what we learn when the positive branch is taken.
   709  	// For example:
   710  	//      OpLess8:   {signed, lt},
   711  	//	v1 = (OpLess8 v2 v3).
   712  	// If v1 branch is taken then we learn that the rangeMask
   713  	// can be at most lt.
   714  	domainRelationTable = map[Op]struct {
   715  		d domain
   716  		r relation
   717  	}{
   718  		OpEq8:   {signed | unsigned, eq},
   719  		OpEq16:  {signed | unsigned, eq},
   720  		OpEq32:  {signed | unsigned, eq},
   721  		OpEq64:  {signed | unsigned, eq},
   722  		OpEqPtr: {pointer, eq},
   723  
   724  		OpNeq8:   {signed | unsigned, lt | gt},
   725  		OpNeq16:  {signed | unsigned, lt | gt},
   726  		OpNeq32:  {signed | unsigned, lt | gt},
   727  		OpNeq64:  {signed | unsigned, lt | gt},
   728  		OpNeqPtr: {pointer, lt | gt},
   729  
   730  		OpLess8:   {signed, lt},
   731  		OpLess8U:  {unsigned, lt},
   732  		OpLess16:  {signed, lt},
   733  		OpLess16U: {unsigned, lt},
   734  		OpLess32:  {signed, lt},
   735  		OpLess32U: {unsigned, lt},
   736  		OpLess64:  {signed, lt},
   737  		OpLess64U: {unsigned, lt},
   738  
   739  		OpLeq8:   {signed, lt | eq},
   740  		OpLeq8U:  {unsigned, lt | eq},
   741  		OpLeq16:  {signed, lt | eq},
   742  		OpLeq16U: {unsigned, lt | eq},
   743  		OpLeq32:  {signed, lt | eq},
   744  		OpLeq32U: {unsigned, lt | eq},
   745  		OpLeq64:  {signed, lt | eq},
   746  		OpLeq64U: {unsigned, lt | eq},
   747  
   748  		// For these ops, the negative branch is different: we can only
   749  		// prove signed/GE (signed/GT) if we can prove that arg0 is non-negative.
   750  		// See the special case in addBranchRestrictions.
   751  		OpIsInBounds:      {signed | unsigned, lt},      // 0 <= arg0 < arg1
   752  		OpIsSliceInBounds: {signed | unsigned, lt | eq}, // 0 <= arg0 <= arg1
   753  	}
   754  )
   755  
   756  // cleanup returns the posets to the free list
   757  func (ft *factsTable) cleanup(f *Func) {
   758  	for _, po := range []*poset{ft.orderS, ft.orderU} {
   759  		// Make sure it's empty as it should be. A non-empty poset
   760  		// might cause errors and miscompilations if reused.
   761  		if checkEnabled {
   762  			if err := po.CheckEmpty(); err != nil {
   763  				f.Fatalf("poset not empty after function %s: %v", f.Name, err)
   764  			}
   765  		}
   766  		f.retPoset(po)
   767  	}
   768  }
   769  
   770  // prove removes redundant BlockIf branches that can be inferred
   771  // from previous dominating comparisons.
   772  //
   773  // By far, the most common redundant pair are generated by bounds checking.
   774  // For example for the code:
   775  //
   776  //	a[i] = 4
   777  //	foo(a[i])
   778  //
   779  // The compiler will generate the following code:
   780  //
   781  //	if i >= len(a) {
   782  //	    panic("not in bounds")
   783  //	}
   784  //	a[i] = 4
   785  //	if i >= len(a) {
   786  //	    panic("not in bounds")
   787  //	}
   788  //	foo(a[i])
   789  //
   790  // The second comparison i >= len(a) is clearly redundant because if the
   791  // else branch of the first comparison is executed, we already know that i < len(a).
   792  // The code for the second panic can be removed.
   793  //
   794  // prove works by finding contradictions and trimming branches whose
   795  // conditions are unsatisfiable given the branches leading up to them.
   796  // It tracks a "fact table" of branch conditions. For each branching
   797  // block, it asserts the branch conditions that uniquely dominate that
   798  // block, and then separately asserts the block's branch condition and
   799  // its negation. If either leads to a contradiction, it can trim that
   800  // successor.
   801  func prove(f *Func) {
   802  	// Find induction variables. Currently, findIndVars
   803  	// is limited to one induction variable per block.
   804  	var indVars map[*Block]indVar
   805  	for _, v := range findIndVar(f) {
   806  		ind := v.ind
   807  		if len(ind.Args) != 2 {
   808  			// the rewrite code assumes there is only ever two parents to loops
   809  			panic("unexpected induction with too many parents")
   810  		}
   811  
   812  		nxt := v.nxt
   813  		if !(ind.Uses == 2 && // 2 used by comparison and next
   814  			nxt.Uses == 1) { // 1 used by induction
   815  			// ind or nxt is used inside the loop, add it for the facts table
   816  			if indVars == nil {
   817  				indVars = make(map[*Block]indVar)
   818  			}
   819  			indVars[v.entry] = v
   820  			continue
   821  		} else {
   822  			// Since this induction variable is not used for anything but counting the iterations,
   823  			// no point in putting it into the facts table.
   824  		}
   825  
   826  		// try to rewrite to a downward counting loop checking against start if the
   827  		// loop body does not depends on ind or nxt and end is known before the loop.
   828  		// This reduce pressure on the register allocator because this do not need
   829  		// to use end on each iteration anymore. We compare against the start constant instead.
   830  		// That means this code:
   831  		//
   832  		//	loop:
   833  		//		ind = (Phi (Const [x]) nxt),
   834  		//		if ind < end
   835  		//		then goto enter_loop
   836  		//		else goto exit_loop
   837  		//
   838  		//	enter_loop:
   839  		//		do something without using ind nor nxt
   840  		//		nxt = inc + ind
   841  		//		goto loop
   842  		//
   843  		//	exit_loop:
   844  		//
   845  		// is rewritten to:
   846  		//
   847  		//	loop:
   848  		//		ind = (Phi end nxt)
   849  		//		if (Const [x]) < ind
   850  		//		then goto enter_loop
   851  		//		else goto exit_loop
   852  		//
   853  		//	enter_loop:
   854  		//		do something without using ind nor nxt
   855  		//		nxt = ind - inc
   856  		//		goto loop
   857  		//
   858  		//	exit_loop:
   859  		//
   860  		// this is better because it only require to keep ind then nxt alive while looping,
   861  		// while the original form keeps ind then nxt and end alive
   862  		start, end := v.min, v.max
   863  		if v.flags&indVarCountDown != 0 {
   864  			start, end = end, start
   865  		}
   866  
   867  		if !(start.Op == OpConst8 || start.Op == OpConst16 || start.Op == OpConst32 || start.Op == OpConst64) {
   868  			// if start is not a constant we would be winning nothing from inverting the loop
   869  			continue
   870  		}
   871  		if end.Op == OpConst8 || end.Op == OpConst16 || end.Op == OpConst32 || end.Op == OpConst64 {
   872  			// TODO: if both start and end are constants we should rewrite such that the comparison
   873  			// is against zero and nxt is ++ or -- operation
   874  			// That means:
   875  			//	for i := 2; i < 11; i += 2 {
   876  			// should be rewritten to:
   877  			//	for i := 5; 0 < i; i-- {
   878  			continue
   879  		}
   880  
   881  		if end.Block == ind.Block {
   882  			// we can't rewrite loops where the condition depends on the loop body
   883  			// this simple check is forced to work because if this is true a Phi in ind.Block must exists
   884  			continue
   885  		}
   886  
   887  		check := ind.Block.Controls[0]
   888  		// invert the check
   889  		check.Args[0], check.Args[1] = check.Args[1], check.Args[0]
   890  
   891  		// swap start and end in the loop
   892  		for i, v := range check.Args {
   893  			if v != end {
   894  				continue
   895  			}
   896  
   897  			check.SetArg(i, start)
   898  			goto replacedEnd
   899  		}
   900  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
   901  	replacedEnd:
   902  
   903  		for i, v := range ind.Args {
   904  			if v != start {
   905  				continue
   906  			}
   907  
   908  			ind.SetArg(i, end)
   909  			goto replacedStart
   910  		}
   911  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
   912  	replacedStart:
   913  
   914  		if nxt.Args[0] != ind {
   915  			// unlike additions subtractions are not commutative so be sure we get it right
   916  			nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0]
   917  		}
   918  
   919  		switch nxt.Op {
   920  		case OpAdd8:
   921  			nxt.Op = OpSub8
   922  		case OpAdd16:
   923  			nxt.Op = OpSub16
   924  		case OpAdd32:
   925  			nxt.Op = OpSub32
   926  		case OpAdd64:
   927  			nxt.Op = OpSub64
   928  		case OpSub8:
   929  			nxt.Op = OpAdd8
   930  		case OpSub16:
   931  			nxt.Op = OpAdd16
   932  		case OpSub32:
   933  			nxt.Op = OpAdd32
   934  		case OpSub64:
   935  			nxt.Op = OpAdd64
   936  		default:
   937  			panic("unreachable")
   938  		}
   939  
   940  		if f.pass.debug > 0 {
   941  			f.Warnl(ind.Pos, "Inverted loop iteration")
   942  		}
   943  	}
   944  
   945  	ft := newFactsTable(f)
   946  	ft.checkpoint()
   947  
   948  	var lensVars map[*Block][]*Value
   949  	var logicVars map[*Block][]*Value
   950  
   951  	// Find length and capacity ops.
   952  	for _, b := range f.Blocks {
   953  		for _, v := range b.Values {
   954  			if v.Uses == 0 {
   955  				// We don't care about dead values.
   956  				// (There can be some that are CSEd but not removed yet.)
   957  				continue
   958  			}
   959  			switch v.Op {
   960  			case OpStringLen:
   961  				ft.update(b, v, ft.zero, signed, gt|eq)
   962  			case OpSliceLen:
   963  				if ft.lens == nil {
   964  					ft.lens = map[ID]*Value{}
   965  				}
   966  				// Set all len Values for the same slice as equal in the poset.
   967  				// The poset handles transitive relations, so Values related to
   968  				// any OpSliceLen for this slice will be correctly related to others.
   969  				if l, ok := ft.lens[v.Args[0].ID]; ok {
   970  					ft.update(b, v, l, signed, eq)
   971  				} else {
   972  					ft.lens[v.Args[0].ID] = v
   973  				}
   974  				ft.update(b, v, ft.zero, signed, gt|eq)
   975  				if v.Args[0].Op == OpSliceMake {
   976  					if lensVars == nil {
   977  						lensVars = make(map[*Block][]*Value)
   978  					}
   979  					lensVars[b] = append(lensVars[b], v)
   980  				}
   981  			case OpSliceCap:
   982  				if ft.caps == nil {
   983  					ft.caps = map[ID]*Value{}
   984  				}
   985  				// Same as case OpSliceLen above, but for slice cap.
   986  				if c, ok := ft.caps[v.Args[0].ID]; ok {
   987  					ft.update(b, v, c, signed, eq)
   988  				} else {
   989  					ft.caps[v.Args[0].ID] = v
   990  				}
   991  				ft.update(b, v, ft.zero, signed, gt|eq)
   992  				if v.Args[0].Op == OpSliceMake {
   993  					if lensVars == nil {
   994  						lensVars = make(map[*Block][]*Value)
   995  					}
   996  					lensVars[b] = append(lensVars[b], v)
   997  				}
   998  			case OpCtz64, OpCtz32, OpCtz16, OpCtz8, OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
   999  				ft.update(b, v, ft.zero, signed, gt|eq)
  1000  				// TODO: we could also do <= 64/32/16/8, if that helped.
  1001  			case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1002  				ft.update(b, v, v.Args[1], unsigned, lt|eq)
  1003  				ft.update(b, v, v.Args[0], unsigned, lt|eq)
  1004  				for i := 0; i < 2; i++ {
  1005  					if isNonNegative(v.Args[i]) {
  1006  						ft.update(b, v, v.Args[i], signed, lt|eq)
  1007  						ft.update(b, v, ft.zero, signed, gt|eq)
  1008  					}
  1009  				}
  1010  				if logicVars == nil {
  1011  					logicVars = make(map[*Block][]*Value)
  1012  				}
  1013  				logicVars[b] = append(logicVars[b], v)
  1014  			case OpOr64, OpOr32, OpOr16, OpOr8:
  1015  				// TODO: investigate how to always add facts without much slowdown, see issue #57959.
  1016  				if v.Args[0].isGenericIntConst() {
  1017  					ft.update(b, v, v.Args[0], unsigned, gt|eq)
  1018  				}
  1019  				if v.Args[1].isGenericIntConst() {
  1020  					ft.update(b, v, v.Args[1], unsigned, gt|eq)
  1021  				}
  1022  			case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  1023  				OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  1024  				OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  1025  				OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  1026  				OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8:
  1027  				ft.update(b, v, v.Args[0], unsigned, lt|eq)
  1028  			case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  1029  				ft.update(b, v, v.Args[0], unsigned, lt|eq)
  1030  				ft.update(b, v, v.Args[1], unsigned, lt)
  1031  			case OpPhi:
  1032  				// Determine the min and max value of OpPhi composed entirely of integer constants.
  1033  				//
  1034  				// For example, for an OpPhi:
  1035  				//
  1036  				// v1 = OpConst64 [13]
  1037  				// v2 = OpConst64 [7]
  1038  				// v3 = OpConst64 [42]
  1039  				//
  1040  				// v4 = OpPhi(v1, v2, v3)
  1041  				//
  1042  				// We can prove:
  1043  				//
  1044  				// v4 >= 7 && v4 <= 42
  1045  				//
  1046  				// TODO(jake-ciolek): Handle nested constant OpPhi's
  1047  				sameConstOp := true
  1048  				min := 0
  1049  				max := 0
  1050  
  1051  				if !v.Args[min].isGenericIntConst() {
  1052  					break
  1053  				}
  1054  
  1055  				for k := range v.Args {
  1056  					if v.Args[k].Op != v.Args[min].Op {
  1057  						sameConstOp = false
  1058  						break
  1059  					}
  1060  					if v.Args[k].AuxInt < v.Args[min].AuxInt {
  1061  						min = k
  1062  					}
  1063  					if v.Args[k].AuxInt > v.Args[max].AuxInt {
  1064  						max = k
  1065  					}
  1066  				}
  1067  
  1068  				if sameConstOp {
  1069  					ft.update(b, v, v.Args[min], signed, gt|eq)
  1070  					ft.update(b, v, v.Args[max], signed, lt|eq)
  1071  				}
  1072  				// One might be tempted to create a v >= ft.zero relation for
  1073  				// all OpPhi's composed of only provably-positive values
  1074  				// but that bloats up the facts table for a very negligible gain.
  1075  				// In Go itself, very few functions get improved (< 5) at a cost of 5-7% total increase
  1076  				// of compile time.
  1077  			}
  1078  		}
  1079  	}
  1080  
  1081  	// current node state
  1082  	type walkState int
  1083  	const (
  1084  		descend walkState = iota
  1085  		simplify
  1086  	)
  1087  	// work maintains the DFS stack.
  1088  	type bp struct {
  1089  		block *Block    // current handled block
  1090  		state walkState // what's to do
  1091  	}
  1092  	work := make([]bp, 0, 256)
  1093  	work = append(work, bp{
  1094  		block: f.Entry,
  1095  		state: descend,
  1096  	})
  1097  
  1098  	idom := f.Idom()
  1099  	sdom := f.Sdom()
  1100  
  1101  	// DFS on the dominator tree.
  1102  	//
  1103  	// For efficiency, we consider only the dominator tree rather
  1104  	// than the entire flow graph. On the way down, we consider
  1105  	// incoming branches and accumulate conditions that uniquely
  1106  	// dominate the current block. If we discover a contradiction,
  1107  	// we can eliminate the entire block and all of its children.
  1108  	// On the way back up, we consider outgoing branches that
  1109  	// haven't already been considered. This way we consider each
  1110  	// branch condition only once.
  1111  	for len(work) > 0 {
  1112  		node := work[len(work)-1]
  1113  		work = work[:len(work)-1]
  1114  		parent := idom[node.block.ID]
  1115  		branch := getBranch(sdom, parent, node.block)
  1116  
  1117  		switch node.state {
  1118  		case descend:
  1119  			ft.checkpoint()
  1120  
  1121  			// Entering the block, add the block-depending facts that we collected
  1122  			// at the beginning: induction variables and lens/caps of slices.
  1123  			if iv, ok := indVars[node.block]; ok {
  1124  				addIndVarRestrictions(ft, parent, iv)
  1125  			}
  1126  			if lens, ok := lensVars[node.block]; ok {
  1127  				for _, v := range lens {
  1128  					switch v.Op {
  1129  					case OpSliceLen:
  1130  						ft.update(node.block, v, v.Args[0].Args[1], signed, eq)
  1131  					case OpSliceCap:
  1132  						ft.update(node.block, v, v.Args[0].Args[2], signed, eq)
  1133  					}
  1134  				}
  1135  			}
  1136  
  1137  			if branch != unknown {
  1138  				addBranchRestrictions(ft, parent, branch)
  1139  				// After we add the branch restriction, re-check the logic operations in the parent block,
  1140  				// it may give us more info to omit some branches
  1141  				if logic, ok := logicVars[parent]; ok {
  1142  					for _, v := range logic {
  1143  						// we only have OpAnd for now
  1144  						ft.update(parent, v, v.Args[1], unsigned, lt|eq)
  1145  						ft.update(parent, v, v.Args[0], unsigned, lt|eq)
  1146  						for i := 0; i < 2; i++ {
  1147  							if isNonNegative(v.Args[i]) {
  1148  								ft.update(parent, v, v.Args[i], signed, lt|eq)
  1149  								ft.update(parent, v, ft.zero, signed, gt|eq)
  1150  							}
  1151  						}
  1152  					}
  1153  				}
  1154  				if ft.unsat {
  1155  					// node.block is unreachable.
  1156  					// Remove it and don't visit
  1157  					// its children.
  1158  					removeBranch(parent, branch)
  1159  					ft.restore()
  1160  					break
  1161  				}
  1162  				// Otherwise, we can now commit to
  1163  				// taking this branch. We'll restore
  1164  				// ft when we unwind.
  1165  			}
  1166  
  1167  			// Add inductive facts for phis in this block.
  1168  			addLocalInductiveFacts(ft, node.block)
  1169  
  1170  			work = append(work, bp{
  1171  				block: node.block,
  1172  				state: simplify,
  1173  			})
  1174  			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
  1175  				work = append(work, bp{
  1176  					block: s,
  1177  					state: descend,
  1178  				})
  1179  			}
  1180  
  1181  		case simplify:
  1182  			simplifyBlock(sdom, ft, node.block)
  1183  			ft.restore()
  1184  		}
  1185  	}
  1186  
  1187  	ft.restore()
  1188  
  1189  	ft.cleanup(f)
  1190  }
  1191  
  1192  // getBranch returns the range restrictions added by p
  1193  // when reaching b. p is the immediate dominator of b.
  1194  func getBranch(sdom SparseTree, p *Block, b *Block) branch {
  1195  	if p == nil {
  1196  		return unknown
  1197  	}
  1198  	switch p.Kind {
  1199  	case BlockIf:
  1200  		// If p and p.Succs[0] are dominators it means that every path
  1201  		// from entry to b passes through p and p.Succs[0]. We care that
  1202  		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
  1203  		// has one predecessor then (apart from the degenerate case),
  1204  		// there is no path from entry that can reach b through p.Succs[1].
  1205  		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
  1206  		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
  1207  			return positive
  1208  		}
  1209  		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
  1210  			return negative
  1211  		}
  1212  	case BlockJumpTable:
  1213  		// TODO: this loop can lead to quadratic behavior, as
  1214  		// getBranch can be called len(p.Succs) times.
  1215  		for i, e := range p.Succs {
  1216  			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
  1217  				return jumpTable0 + branch(i)
  1218  			}
  1219  		}
  1220  	}
  1221  	return unknown
  1222  }
  1223  
  1224  // addIndVarRestrictions updates the factsTables ft with the facts
  1225  // learned from the induction variable indVar which drives the loop
  1226  // starting in Block b.
  1227  func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
  1228  	d := signed
  1229  	if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) {
  1230  		d |= unsigned
  1231  	}
  1232  
  1233  	if iv.flags&indVarMinExc == 0 {
  1234  		addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq)
  1235  	} else {
  1236  		addRestrictions(b, ft, d, iv.min, iv.ind, lt)
  1237  	}
  1238  
  1239  	if iv.flags&indVarMaxInc == 0 {
  1240  		addRestrictions(b, ft, d, iv.ind, iv.max, lt)
  1241  	} else {
  1242  		addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq)
  1243  	}
  1244  }
  1245  
  1246  // addBranchRestrictions updates the factsTables ft with the facts learned when
  1247  // branching from Block b in direction br.
  1248  func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
  1249  	c := b.Controls[0]
  1250  	switch {
  1251  	case br == negative:
  1252  		addRestrictions(b, ft, boolean, nil, c, eq)
  1253  	case br == positive:
  1254  		addRestrictions(b, ft, boolean, nil, c, lt|gt)
  1255  	case br >= jumpTable0:
  1256  		idx := br - jumpTable0
  1257  		val := int64(idx)
  1258  		if v, off := isConstDelta(c); v != nil {
  1259  			// Establish the bound on the underlying value we're switching on,
  1260  			// not on the offset-ed value used as the jump table index.
  1261  			c = v
  1262  			val -= off
  1263  		}
  1264  		old, ok := ft.limits[c.ID]
  1265  		if !ok {
  1266  			old = noLimit
  1267  		}
  1268  		ft.limitStack = append(ft.limitStack, limitFact{c.ID, old})
  1269  		if val < old.min || val > old.max || uint64(val) < old.umin || uint64(val) > old.umax {
  1270  			ft.unsat = true
  1271  			if b.Func.pass.debug > 2 {
  1272  				b.Func.Warnl(b.Pos, "block=%s outedge=%d %s=%d unsat", b, idx, c, val)
  1273  			}
  1274  		} else {
  1275  			ft.limits[c.ID] = limit{val, val, uint64(val), uint64(val)}
  1276  			if b.Func.pass.debug > 2 {
  1277  				b.Func.Warnl(b.Pos, "block=%s outedge=%d %s=%d", b, idx, c, val)
  1278  			}
  1279  		}
  1280  	default:
  1281  		panic("unknown branch")
  1282  	}
  1283  	if tr, has := domainRelationTable[c.Op]; has {
  1284  		// When we branched from parent we learned a new set of
  1285  		// restrictions. Update the factsTable accordingly.
  1286  		d := tr.d
  1287  		if d == signed && ft.isNonNegative(c.Args[0]) && ft.isNonNegative(c.Args[1]) {
  1288  			d |= unsigned
  1289  		}
  1290  		switch c.Op {
  1291  		case OpIsInBounds, OpIsSliceInBounds:
  1292  			// 0 <= a0 < a1 (or 0 <= a0 <= a1)
  1293  			//
  1294  			// On the positive branch, we learn:
  1295  			//   signed: 0 <= a0 < a1 (or 0 <= a0 <= a1)
  1296  			//   unsigned:    a0 < a1 (or a0 <= a1)
  1297  			//
  1298  			// On the negative branch, we learn (0 > a0 ||
  1299  			// a0 >= a1). In the unsigned domain, this is
  1300  			// simply a0 >= a1 (which is the reverse of the
  1301  			// positive branch, so nothing surprising).
  1302  			// But in the signed domain, we can't express the ||
  1303  			// condition, so check if a0 is non-negative instead,
  1304  			// to be able to learn something.
  1305  			switch br {
  1306  			case negative:
  1307  				d = unsigned
  1308  				if ft.isNonNegative(c.Args[0]) {
  1309  					d |= signed
  1310  				}
  1311  				addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r^(lt|gt|eq))
  1312  			case positive:
  1313  				addRestrictions(b, ft, signed, ft.zero, c.Args[0], lt|eq)
  1314  				addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r)
  1315  			}
  1316  		default:
  1317  			switch br {
  1318  			case negative:
  1319  				addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r^(lt|gt|eq))
  1320  			case positive:
  1321  				addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r)
  1322  			}
  1323  		}
  1324  
  1325  	}
  1326  }
  1327  
  1328  // addRestrictions updates restrictions from the immediate
  1329  // dominating block (p) using r.
  1330  func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) {
  1331  	if t == 0 {
  1332  		// Trivial case: nothing to do.
  1333  		// Should not happen, but just in case.
  1334  		return
  1335  	}
  1336  	for i := domain(1); i <= t; i <<= 1 {
  1337  		if t&i == 0 {
  1338  			continue
  1339  		}
  1340  		ft.update(parent, v, w, i, r)
  1341  	}
  1342  }
  1343  
  1344  // addLocalInductiveFacts adds inductive facts when visiting b, where
  1345  // b is a join point in a loop. In contrast with findIndVar, this
  1346  // depends on facts established for b, which is why it happens when
  1347  // visiting b.
  1348  //
  1349  // TODO: It would be nice to combine this with findIndVar.
  1350  func addLocalInductiveFacts(ft *factsTable, b *Block) {
  1351  	// This looks for a specific pattern of induction:
  1352  	//
  1353  	// 1. i1 = OpPhi(min, i2) in b
  1354  	// 2. i2 = i1 + 1
  1355  	// 3. i2 < max at exit from b.Preds[1]
  1356  	// 4. min < max
  1357  	//
  1358  	// If all of these conditions are true, then i1 < max and i1 >= min.
  1359  
  1360  	// To ensure this is a loop header node.
  1361  	if len(b.Preds) != 2 {
  1362  		return
  1363  	}
  1364  
  1365  	for _, i1 := range b.Values {
  1366  		if i1.Op != OpPhi {
  1367  			continue
  1368  		}
  1369  
  1370  		// Check for conditions 1 and 2. This is easy to do
  1371  		// and will throw out most phis.
  1372  		min, i2 := i1.Args[0], i1.Args[1]
  1373  		if i1q, delta := isConstDelta(i2); i1q != i1 || delta != 1 {
  1374  			continue
  1375  		}
  1376  
  1377  		// Try to prove condition 3. We can't just query the
  1378  		// fact table for this because we don't know what the
  1379  		// facts of b.Preds[1] are (in general, b.Preds[1] is
  1380  		// a loop-back edge, so we haven't even been there
  1381  		// yet). As a conservative approximation, we look for
  1382  		// this condition in the predecessor chain until we
  1383  		// hit a join point.
  1384  		uniquePred := func(b *Block) *Block {
  1385  			if len(b.Preds) == 1 {
  1386  				return b.Preds[0].b
  1387  			}
  1388  			return nil
  1389  		}
  1390  		pred, child := b.Preds[1].b, b
  1391  		for ; pred != nil; pred, child = uniquePred(pred), pred {
  1392  			if pred.Kind != BlockIf {
  1393  				continue
  1394  			}
  1395  			control := pred.Controls[0]
  1396  
  1397  			br := unknown
  1398  			if pred.Succs[0].b == child {
  1399  				br = positive
  1400  			}
  1401  			if pred.Succs[1].b == child {
  1402  				if br != unknown {
  1403  					continue
  1404  				}
  1405  				br = negative
  1406  			}
  1407  			if br == unknown {
  1408  				continue
  1409  			}
  1410  
  1411  			tr, has := domainRelationTable[control.Op]
  1412  			if !has {
  1413  				continue
  1414  			}
  1415  			r := tr.r
  1416  			if br == negative {
  1417  				// Negative branch taken to reach b.
  1418  				// Complement the relations.
  1419  				r = (lt | eq | gt) ^ r
  1420  			}
  1421  
  1422  			// Check for i2 < max or max > i2.
  1423  			var max *Value
  1424  			if r == lt && control.Args[0] == i2 {
  1425  				max = control.Args[1]
  1426  			} else if r == gt && control.Args[1] == i2 {
  1427  				max = control.Args[0]
  1428  			} else {
  1429  				continue
  1430  			}
  1431  
  1432  			// Check condition 4 now that we have a
  1433  			// candidate max. For this we can query the
  1434  			// fact table. We "prove" min < max by showing
  1435  			// that min >= max is unsat. (This may simply
  1436  			// compare two constants; that's fine.)
  1437  			ft.checkpoint()
  1438  			ft.update(b, min, max, tr.d, gt|eq)
  1439  			proved := ft.unsat
  1440  			ft.restore()
  1441  
  1442  			if proved {
  1443  				// We know that min <= i1 < max.
  1444  				if b.Func.pass.debug > 0 {
  1445  					printIndVar(b, i1, min, max, 1, 0)
  1446  				}
  1447  				ft.update(b, min, i1, tr.d, lt|eq)
  1448  				ft.update(b, i1, max, tr.d, lt)
  1449  			}
  1450  		}
  1451  	}
  1452  }
  1453  
  1454  var ctzNonZeroOp = map[Op]Op{OpCtz8: OpCtz8NonZero, OpCtz16: OpCtz16NonZero, OpCtz32: OpCtz32NonZero, OpCtz64: OpCtz64NonZero}
  1455  var mostNegativeDividend = map[Op]int64{
  1456  	OpDiv16: -1 << 15,
  1457  	OpMod16: -1 << 15,
  1458  	OpDiv32: -1 << 31,
  1459  	OpMod32: -1 << 31,
  1460  	OpDiv64: -1 << 63,
  1461  	OpMod64: -1 << 63}
  1462  
  1463  // simplifyBlock simplifies some constant values in b and evaluates
  1464  // branches to non-uniquely dominated successors of b.
  1465  func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
  1466  	for _, v := range b.Values {
  1467  		switch v.Op {
  1468  		case OpSlicemask:
  1469  			// Replace OpSlicemask operations in b with constants where possible.
  1470  			x, delta := isConstDelta(v.Args[0])
  1471  			if x == nil {
  1472  				break
  1473  			}
  1474  			// slicemask(x + y)
  1475  			// if x is larger than -y (y is negative), then slicemask is -1.
  1476  			lim, ok := ft.limits[x.ID]
  1477  			if !ok {
  1478  				break
  1479  			}
  1480  			if lim.umin > uint64(-delta) {
  1481  				if v.Args[0].Op == OpAdd64 {
  1482  					v.reset(OpConst64)
  1483  				} else {
  1484  					v.reset(OpConst32)
  1485  				}
  1486  				if b.Func.pass.debug > 0 {
  1487  					b.Func.Warnl(v.Pos, "Proved slicemask not needed")
  1488  				}
  1489  				v.AuxInt = -1
  1490  			}
  1491  		case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
  1492  			// On some architectures, notably amd64, we can generate much better
  1493  			// code for CtzNN if we know that the argument is non-zero.
  1494  			// Capture that information here for use in arch-specific optimizations.
  1495  			x := v.Args[0]
  1496  			lim, ok := ft.limits[x.ID]
  1497  			if !ok {
  1498  				break
  1499  			}
  1500  			if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
  1501  				if b.Func.pass.debug > 0 {
  1502  					b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op)
  1503  				}
  1504  				v.Op = ctzNonZeroOp[v.Op]
  1505  			}
  1506  		case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64,
  1507  			OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64,
  1508  			OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64,
  1509  			OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64:
  1510  			// Check whether, for a >> b, we know that a is non-negative
  1511  			// and b is all of a's bits except the MSB. If so, a is shifted to zero.
  1512  			bits := 8 * v.Type.Size()
  1513  			if v.Args[1].isGenericIntConst() && v.Args[1].AuxInt >= bits-1 && ft.isNonNegative(v.Args[0]) {
  1514  				if b.Func.pass.debug > 0 {
  1515  					b.Func.Warnl(v.Pos, "Proved %v shifts to zero", v.Op)
  1516  				}
  1517  				switch bits {
  1518  				case 64:
  1519  					v.reset(OpConst64)
  1520  				case 32:
  1521  					v.reset(OpConst32)
  1522  				case 16:
  1523  					v.reset(OpConst16)
  1524  				case 8:
  1525  					v.reset(OpConst8)
  1526  				default:
  1527  					panic("unexpected integer size")
  1528  				}
  1529  				v.AuxInt = 0
  1530  				break // Be sure not to fallthrough - this is no longer OpRsh.
  1531  			}
  1532  			// If the Rsh hasn't been replaced with 0, still check if it is bounded.
  1533  			fallthrough
  1534  		case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64,
  1535  			OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64,
  1536  			OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64,
  1537  			OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64,
  1538  			OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64,
  1539  			OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64,
  1540  			OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64,
  1541  			OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64:
  1542  			// Check whether, for a << b, we know that b
  1543  			// is strictly less than the number of bits in a.
  1544  			by := v.Args[1]
  1545  			lim, ok := ft.limits[by.ID]
  1546  			if !ok {
  1547  				break
  1548  			}
  1549  			bits := 8 * v.Args[0].Type.Size()
  1550  			if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) {
  1551  				v.AuxInt = 1 // see shiftIsBounded
  1552  				if b.Func.pass.debug > 0 {
  1553  					b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op)
  1554  				}
  1555  			}
  1556  		case OpDiv16, OpDiv32, OpDiv64, OpMod16, OpMod32, OpMod64:
  1557  			// On amd64 and 386 fix-up code can be avoided if we know
  1558  			//  the divisor is not -1 or the dividend > MinIntNN.
  1559  			// Don't modify AuxInt on other architectures,
  1560  			// as that can interfere with CSE.
  1561  			// TODO: add other architectures?
  1562  			if b.Func.Config.arch != "386" && b.Func.Config.arch != "amd64" {
  1563  				break
  1564  			}
  1565  			divr := v.Args[1]
  1566  			divrLim, divrLimok := ft.limits[divr.ID]
  1567  			divd := v.Args[0]
  1568  			divdLim, divdLimok := ft.limits[divd.ID]
  1569  			if (divrLimok && (divrLim.max < -1 || divrLim.min > -1)) ||
  1570  				(divdLimok && divdLim.min > mostNegativeDividend[v.Op]) {
  1571  				// See DivisionNeedsFixUp in rewrite.go.
  1572  				// v.AuxInt = 1 means we have proved both that the divisor is not -1
  1573  				// and that the dividend is not the most negative integer,
  1574  				// so we do not need to add fix-up code.
  1575  				v.AuxInt = 1
  1576  				if b.Func.pass.debug > 0 {
  1577  					b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op)
  1578  				}
  1579  			}
  1580  		}
  1581  		// Fold provable constant results.
  1582  		// Helps in cases where we reuse a value after branching on its equality.
  1583  		for i, arg := range v.Args {
  1584  			switch arg.Op {
  1585  			case OpConst64, OpConst32, OpConst16, OpConst8:
  1586  				continue
  1587  			}
  1588  			lim, ok := ft.limits[arg.ID]
  1589  			if !ok {
  1590  				continue
  1591  			}
  1592  
  1593  			var constValue int64
  1594  			typ := arg.Type
  1595  			bits := 8 * typ.Size()
  1596  			switch {
  1597  			case lim.min == lim.max:
  1598  				constValue = lim.min
  1599  			case lim.umin == lim.umax:
  1600  				// truncate then sign extand
  1601  				switch bits {
  1602  				case 64:
  1603  					constValue = int64(lim.umin)
  1604  				case 32:
  1605  					constValue = int64(int32(lim.umin))
  1606  				case 16:
  1607  					constValue = int64(int16(lim.umin))
  1608  				case 8:
  1609  					constValue = int64(int8(lim.umin))
  1610  				default:
  1611  					panic("unexpected integer size")
  1612  				}
  1613  			default:
  1614  				continue
  1615  			}
  1616  			var c *Value
  1617  			f := b.Func
  1618  			switch bits {
  1619  			case 64:
  1620  				c = f.ConstInt64(typ, constValue)
  1621  			case 32:
  1622  				c = f.ConstInt32(typ, int32(constValue))
  1623  			case 16:
  1624  				c = f.ConstInt16(typ, int16(constValue))
  1625  			case 8:
  1626  				c = f.ConstInt8(typ, int8(constValue))
  1627  			default:
  1628  				panic("unexpected integer size")
  1629  			}
  1630  			v.SetArg(i, c)
  1631  			if b.Func.pass.debug > 1 {
  1632  				b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue)
  1633  			}
  1634  		}
  1635  	}
  1636  
  1637  	if b.Kind != BlockIf {
  1638  		return
  1639  	}
  1640  
  1641  	// Consider outgoing edges from this block.
  1642  	parent := b
  1643  	for i, branch := range [...]branch{positive, negative} {
  1644  		child := parent.Succs[i].b
  1645  		if getBranch(sdom, parent, child) != unknown {
  1646  			// For edges to uniquely dominated blocks, we
  1647  			// already did this when we visited the child.
  1648  			continue
  1649  		}
  1650  		// For edges to other blocks, this can trim a branch
  1651  		// even if we couldn't get rid of the child itself.
  1652  		ft.checkpoint()
  1653  		addBranchRestrictions(ft, parent, branch)
  1654  		unsat := ft.unsat
  1655  		ft.restore()
  1656  		if unsat {
  1657  			// This branch is impossible, so remove it
  1658  			// from the block.
  1659  			removeBranch(parent, branch)
  1660  			// No point in considering the other branch.
  1661  			// (It *is* possible for both to be
  1662  			// unsatisfiable since the fact table is
  1663  			// incomplete. We could turn this into a
  1664  			// BlockExit, but it doesn't seem worth it.)
  1665  			break
  1666  		}
  1667  	}
  1668  }
  1669  
  1670  func removeBranch(b *Block, branch branch) {
  1671  	c := b.Controls[0]
  1672  	if b.Func.pass.debug > 0 {
  1673  		verb := "Proved"
  1674  		if branch == positive {
  1675  			verb = "Disproved"
  1676  		}
  1677  		if b.Func.pass.debug > 1 {
  1678  			b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c)
  1679  		} else {
  1680  			b.Func.Warnl(b.Pos, "%s %s", verb, c.Op)
  1681  		}
  1682  	}
  1683  	if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) {
  1684  		// attempt to preserve statement marker.
  1685  		b.Pos = b.Pos.WithIsStmt()
  1686  	}
  1687  	if branch == positive || branch == negative {
  1688  		b.Kind = BlockFirst
  1689  		b.ResetControls()
  1690  		if branch == positive {
  1691  			b.swapSuccessors()
  1692  		}
  1693  	} else {
  1694  		// TODO: figure out how to remove an entry from a jump table
  1695  	}
  1696  }
  1697  
  1698  // isNonNegative reports whether v is known to be greater or equal to zero.
  1699  func isNonNegative(v *Value) bool {
  1700  	if !v.Type.IsInteger() {
  1701  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  1702  	}
  1703  	// TODO: return true if !v.Type.IsSigned()
  1704  	// SSA isn't type-safe enough to do that now (issue 37753).
  1705  	// The checks below depend only on the pattern of bits.
  1706  
  1707  	switch v.Op {
  1708  	case OpConst64:
  1709  		return v.AuxInt >= 0
  1710  
  1711  	case OpConst32:
  1712  		return int32(v.AuxInt) >= 0
  1713  
  1714  	case OpConst16:
  1715  		return int16(v.AuxInt) >= 0
  1716  
  1717  	case OpConst8:
  1718  		return int8(v.AuxInt) >= 0
  1719  
  1720  	case OpStringLen, OpSliceLen, OpSliceCap,
  1721  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  1722  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  1723  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  1724  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  1725  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  1726  		return true
  1727  
  1728  	case OpRsh64Ux64, OpRsh32Ux64:
  1729  		by := v.Args[1]
  1730  		return by.Op == OpConst64 && by.AuxInt > 0
  1731  
  1732  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  1733  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  1734  		return isNonNegative(v.Args[0])
  1735  
  1736  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1737  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  1738  
  1739  	case OpMod64, OpMod32, OpMod16, OpMod8,
  1740  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  1741  		OpOr64, OpOr32, OpOr16, OpOr8,
  1742  		OpXor64, OpXor32, OpXor16, OpXor8:
  1743  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  1744  
  1745  		// We could handle OpPhi here, but the improvements from doing
  1746  		// so are very minor, and it is neither simple nor cheap.
  1747  	}
  1748  	return false
  1749  }
  1750  
  1751  // isConstDelta returns non-nil if v is equivalent to w+delta (signed).
  1752  func isConstDelta(v *Value) (w *Value, delta int64) {
  1753  	cop := OpConst64
  1754  	switch v.Op {
  1755  	case OpAdd32, OpSub32:
  1756  		cop = OpConst32
  1757  	case OpAdd16, OpSub16:
  1758  		cop = OpConst16
  1759  	case OpAdd8, OpSub8:
  1760  		cop = OpConst8
  1761  	}
  1762  	switch v.Op {
  1763  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  1764  		if v.Args[0].Op == cop {
  1765  			return v.Args[1], v.Args[0].AuxInt
  1766  		}
  1767  		if v.Args[1].Op == cop {
  1768  			return v.Args[0], v.Args[1].AuxInt
  1769  		}
  1770  	case OpSub64, OpSub32, OpSub16, OpSub8:
  1771  		if v.Args[1].Op == cop {
  1772  			aux := v.Args[1].AuxInt
  1773  			if aux != -aux { // Overflow; too bad
  1774  				return v.Args[0], -aux
  1775  			}
  1776  		}
  1777  	}
  1778  	return nil, 0
  1779  }
  1780  
  1781  // isCleanExt reports whether v is the result of a value-preserving
  1782  // sign or zero extension.
  1783  func isCleanExt(v *Value) bool {
  1784  	switch v.Op {
  1785  	case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64,
  1786  		OpSignExt16to32, OpSignExt16to64, OpSignExt32to64:
  1787  		// signed -> signed is the only value-preserving sign extension
  1788  		return v.Args[0].Type.IsSigned() && v.Type.IsSigned()
  1789  
  1790  	case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64,
  1791  		OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64:
  1792  		// unsigned -> signed/unsigned are value-preserving zero extensions
  1793  		return !v.Args[0].Type.IsSigned()
  1794  	}
  1795  	return false
  1796  }
  1797  

View as plain text