...

Source file src/cmd/compile/internal/ssa/rewrite.go

Documentation: cmd/compile/internal/ssa

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   203  				pendingLines.remove(vl)
   204  				v.Pos = v.Pos.WithIsStmt()
   205  			}
   206  			if i != j {
   207  				b.Values[j] = v
   208  			}
   209  			j++
   210  		}
   211  		if pendingLines.get(b.Pos) == int32(b.ID) {
   212  			b.Pos = b.Pos.WithIsStmt()
   213  			pendingLines.remove(b.Pos)
   214  		}
   215  		b.truncateValues(j)
   216  	}
   217  }
   218  
   219  // Common functions called from rewriting rules
   220  
   221  func is64BitFloat(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsFloat()
   223  }
   224  
   225  func is32BitFloat(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsFloat()
   227  }
   228  
   229  func is64BitInt(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsInteger()
   231  }
   232  
   233  func is32BitInt(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsInteger()
   235  }
   236  
   237  func is16BitInt(t *types.Type) bool {
   238  	return t.Size() == 2 && t.IsInteger()
   239  }
   240  
   241  func is8BitInt(t *types.Type) bool {
   242  	return t.Size() == 1 && t.IsInteger()
   243  }
   244  
   245  func isPtr(t *types.Type) bool {
   246  	return t.IsPtrShaped()
   247  }
   248  
   249  // mergeSym merges two symbolic offsets. There is no real merging of
   250  // offsets, we just pick the non-nil one.
   251  func mergeSym(x, y Sym) Sym {
   252  	if x == nil {
   253  		return y
   254  	}
   255  	if y == nil {
   256  		return x
   257  	}
   258  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   259  }
   260  
   261  func canMergeSym(x, y Sym) bool {
   262  	return x == nil || y == nil
   263  }
   264  
   265  // canMergeLoadClobber reports whether the load can be merged into target without
   266  // invalidating the schedule.
   267  // It also checks that the other non-load argument x is something we
   268  // are ok with clobbering.
   269  func canMergeLoadClobber(target, load, x *Value) bool {
   270  	// The register containing x is going to get clobbered.
   271  	// Don't merge if we still need the value of x.
   272  	// We don't have liveness information here, but we can
   273  	// approximate x dying with:
   274  	//  1) target is x's only use.
   275  	//  2) target is not in a deeper loop than x.
   276  	if x.Uses != 1 {
   277  		return false
   278  	}
   279  	loopnest := x.Block.Func.loopnest()
   280  	loopnest.calculateDepths()
   281  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   282  		return false
   283  	}
   284  	return canMergeLoad(target, load)
   285  }
   286  
   287  // canMergeLoad reports whether the load can be merged into target without
   288  // invalidating the schedule.
   289  func canMergeLoad(target, load *Value) bool {
   290  	if target.Block.ID != load.Block.ID {
   291  		// If the load is in a different block do not merge it.
   292  		return false
   293  	}
   294  
   295  	// We can't merge the load into the target if the load
   296  	// has more than one use.
   297  	if load.Uses != 1 {
   298  		return false
   299  	}
   300  
   301  	mem := load.MemoryArg()
   302  
   303  	// We need the load's memory arg to still be alive at target. That
   304  	// can't be the case if one of target's args depends on a memory
   305  	// state that is a successor of load's memory arg.
   306  	//
   307  	// For example, it would be invalid to merge load into target in
   308  	// the following situation because newmem has killed oldmem
   309  	// before target is reached:
   310  	//     load = read ... oldmem
   311  	//   newmem = write ... oldmem
   312  	//     arg0 = read ... newmem
   313  	//   target = add arg0 load
   314  	//
   315  	// If the argument comes from a different block then we can exclude
   316  	// it immediately because it must dominate load (which is in the
   317  	// same block as target).
   318  	var args []*Value
   319  	for _, a := range target.Args {
   320  		if a != load && a.Block.ID == target.Block.ID {
   321  			args = append(args, a)
   322  		}
   323  	}
   324  
   325  	// memPreds contains memory states known to be predecessors of load's
   326  	// memory state. It is lazily initialized.
   327  	var memPreds map[*Value]bool
   328  	for i := 0; len(args) > 0; i++ {
   329  		const limit = 100
   330  		if i >= limit {
   331  			// Give up if we have done a lot of iterations.
   332  			return false
   333  		}
   334  		v := args[len(args)-1]
   335  		args = args[:len(args)-1]
   336  		if target.Block.ID != v.Block.ID {
   337  			// Since target and load are in the same block
   338  			// we can stop searching when we leave the block.
   339  			continue
   340  		}
   341  		if v.Op == OpPhi {
   342  			// A Phi implies we have reached the top of the block.
   343  			// The memory phi, if it exists, is always
   344  			// the first logical store in the block.
   345  			continue
   346  		}
   347  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   348  			// We could handle this situation however it is likely
   349  			// to be very rare.
   350  			return false
   351  		}
   352  		if v.Op.SymEffect()&SymAddr != 0 {
   353  			// This case prevents an operation that calculates the
   354  			// address of a local variable from being forced to schedule
   355  			// before its corresponding VarDef.
   356  			// See issue 28445.
   357  			//   v1 = LOAD ...
   358  			//   v2 = VARDEF
   359  			//   v3 = LEAQ
   360  			//   v4 = CMPQ v1 v3
   361  			// We don't want to combine the CMPQ with the load, because
   362  			// that would force the CMPQ to schedule before the VARDEF, which
   363  			// in turn requires the LEAQ to schedule before the VARDEF.
   364  			return false
   365  		}
   366  		if v.Type.IsMemory() {
   367  			if memPreds == nil {
   368  				// Initialise a map containing memory states
   369  				// known to be predecessors of load's memory
   370  				// state.
   371  				memPreds = make(map[*Value]bool)
   372  				m := mem
   373  				const limit = 50
   374  				for i := 0; i < limit; i++ {
   375  					if m.Op == OpPhi {
   376  						// The memory phi, if it exists, is always
   377  						// the first logical store in the block.
   378  						break
   379  					}
   380  					if m.Block.ID != target.Block.ID {
   381  						break
   382  					}
   383  					if !m.Type.IsMemory() {
   384  						break
   385  					}
   386  					memPreds[m] = true
   387  					if len(m.Args) == 0 {
   388  						break
   389  					}
   390  					m = m.MemoryArg()
   391  				}
   392  			}
   393  
   394  			// We can merge if v is a predecessor of mem.
   395  			//
   396  			// For example, we can merge load into target in the
   397  			// following scenario:
   398  			//      x = read ... v
   399  			//    mem = write ... v
   400  			//   load = read ... mem
   401  			// target = add x load
   402  			if memPreds[v] {
   403  				continue
   404  			}
   405  			return false
   406  		}
   407  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   408  			// If v takes mem as an input then we know mem
   409  			// is valid at this point.
   410  			continue
   411  		}
   412  		for _, a := range v.Args {
   413  			if target.Block.ID == a.Block.ID {
   414  				args = append(args, a)
   415  			}
   416  		}
   417  	}
   418  
   419  	return true
   420  }
   421  
   422  // isSameCall reports whether sym is the same as the given named symbol.
   423  func isSameCall(sym interface{}, name string) bool {
   424  	fn := sym.(*AuxCall).Fn
   425  	return fn != nil && fn.String() == name
   426  }
   427  
   428  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   429  func canLoadUnaligned(c *Config) bool {
   430  	return c.ctxt.Arch.Alignment == 1
   431  }
   432  
   433  // nlzX returns the number of leading zeros.
   434  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   435  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   436  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   437  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   438  
   439  // ntzX returns the number of trailing zeros.
   440  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   441  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   442  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   443  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   444  
   445  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   446  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   447  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   448  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   449  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   450  
   451  // nto returns the number of trailing ones.
   452  func nto(x int64) int64 {
   453  	return int64(ntz64(^x))
   454  }
   455  
   456  // logX returns logarithm of n base 2.
   457  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   458  func log8(n int8) int64 {
   459  	return int64(bits.Len8(uint8(n))) - 1
   460  }
   461  func log16(n int16) int64 {
   462  	return int64(bits.Len16(uint16(n))) - 1
   463  }
   464  func log32(n int32) int64 {
   465  	return int64(bits.Len32(uint32(n))) - 1
   466  }
   467  func log64(n int64) int64 {
   468  	return int64(bits.Len64(uint64(n))) - 1
   469  }
   470  
   471  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   472  // Rounds down.
   473  func log2uint32(n int64) int64 {
   474  	return int64(bits.Len32(uint32(n))) - 1
   475  }
   476  
   477  // isPowerOfTwoX functions report whether n is a power of 2.
   478  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   479  	return n > 0 && n&(n-1) == 0
   480  }
   481  
   482  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   483  func isUint64PowerOfTwo(in int64) bool {
   484  	n := uint64(in)
   485  	return n > 0 && n&(n-1) == 0
   486  }
   487  
   488  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   489  func isUint32PowerOfTwo(in int64) bool {
   490  	n := uint64(uint32(in))
   491  	return n > 0 && n&(n-1) == 0
   492  }
   493  
   494  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   495  func is32Bit(n int64) bool {
   496  	return n == int64(int32(n))
   497  }
   498  
   499  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   500  func is16Bit(n int64) bool {
   501  	return n == int64(int16(n))
   502  }
   503  
   504  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   505  func is8Bit(n int64) bool {
   506  	return n == int64(int8(n))
   507  }
   508  
   509  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   510  func isU8Bit(n int64) bool {
   511  	return n == int64(uint8(n))
   512  }
   513  
   514  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   515  func isU12Bit(n int64) bool {
   516  	return 0 <= n && n < (1<<12)
   517  }
   518  
   519  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   520  func isU16Bit(n int64) bool {
   521  	return n == int64(uint16(n))
   522  }
   523  
   524  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   525  func isU32Bit(n int64) bool {
   526  	return n == int64(uint32(n))
   527  }
   528  
   529  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   530  func is20Bit(n int64) bool {
   531  	return -(1<<19) <= n && n < (1<<19)
   532  }
   533  
   534  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   535  func b2i(b bool) int64 {
   536  	if b {
   537  		return 1
   538  	}
   539  	return 0
   540  }
   541  
   542  // b2i32 translates a boolean value to 0 or 1.
   543  func b2i32(b bool) int32 {
   544  	if b {
   545  		return 1
   546  	}
   547  	return 0
   548  }
   549  
   550  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   551  // A shift is bounded if it is shifting by less than the width of the shifted value.
   552  func shiftIsBounded(v *Value) bool {
   553  	return v.AuxInt != 0
   554  }
   555  
   556  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   557  // generated code as much as possible.
   558  func canonLessThan(x, y *Value) bool {
   559  	if x.Op != y.Op {
   560  		return x.Op < y.Op
   561  	}
   562  	if !x.Pos.SameFileAndLine(y.Pos) {
   563  		return x.Pos.Before(y.Pos)
   564  	}
   565  	return x.ID < y.ID
   566  }
   567  
   568  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   569  // of the mantissa. It will panic if the truncation results in lost information.
   570  func truncate64Fto32F(f float64) float32 {
   571  	if !isExactFloat32(f) {
   572  		panic("truncate64Fto32F: truncation is not exact")
   573  	}
   574  	if !math.IsNaN(f) {
   575  		return float32(f)
   576  	}
   577  	// NaN bit patterns aren't necessarily preserved across conversion
   578  	// instructions so we need to do the conversion manually.
   579  	b := math.Float64bits(f)
   580  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   581  	//          | sign                  | exponent   | mantissa       |
   582  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   583  	return math.Float32frombits(r)
   584  }
   585  
   586  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   587  // pattern of the mantissa.
   588  func extend32Fto64F(f float32) float64 {
   589  	if !math.IsNaN(float64(f)) {
   590  		return float64(f)
   591  	}
   592  	// NaN bit patterns aren't necessarily preserved across conversion
   593  	// instructions so we need to do the conversion manually.
   594  	b := uint64(math.Float32bits(f))
   595  	//   | sign                  | exponent      | mantissa                    |
   596  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   597  	return math.Float64frombits(r)
   598  }
   599  
   600  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   601  func DivisionNeedsFixUp(v *Value) bool {
   602  	return v.AuxInt == 0
   603  }
   604  
   605  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   606  func auxFrom64F(f float64) int64 {
   607  	if f != f {
   608  		panic("can't encode a NaN in AuxInt field")
   609  	}
   610  	return int64(math.Float64bits(f))
   611  }
   612  
   613  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   614  func auxFrom32F(f float32) int64 {
   615  	if f != f {
   616  		panic("can't encode a NaN in AuxInt field")
   617  	}
   618  	return int64(math.Float64bits(extend32Fto64F(f)))
   619  }
   620  
   621  // auxTo32F decodes a float32 from the AuxInt value provided.
   622  func auxTo32F(i int64) float32 {
   623  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   624  }
   625  
   626  // auxTo64F decodes a float64 from the AuxInt value provided.
   627  func auxTo64F(i int64) float64 {
   628  	return math.Float64frombits(uint64(i))
   629  }
   630  
   631  func auxIntToBool(i int64) bool {
   632  	if i == 0 {
   633  		return false
   634  	}
   635  	return true
   636  }
   637  func auxIntToInt8(i int64) int8 {
   638  	return int8(i)
   639  }
   640  func auxIntToInt16(i int64) int16 {
   641  	return int16(i)
   642  }
   643  func auxIntToInt32(i int64) int32 {
   644  	return int32(i)
   645  }
   646  func auxIntToInt64(i int64) int64 {
   647  	return i
   648  }
   649  func auxIntToUint8(i int64) uint8 {
   650  	return uint8(i)
   651  }
   652  func auxIntToFloat32(i int64) float32 {
   653  	return float32(math.Float64frombits(uint64(i)))
   654  }
   655  func auxIntToFloat64(i int64) float64 {
   656  	return math.Float64frombits(uint64(i))
   657  }
   658  func auxIntToValAndOff(i int64) ValAndOff {
   659  	return ValAndOff(i)
   660  }
   661  func auxIntToArm64BitField(i int64) arm64BitField {
   662  	return arm64BitField(i)
   663  }
   664  func auxIntToInt128(x int64) int128 {
   665  	if x != 0 {
   666  		panic("nonzero int128 not allowed")
   667  	}
   668  	return 0
   669  }
   670  func auxIntToFlagConstant(x int64) flagConstant {
   671  	return flagConstant(x)
   672  }
   673  
   674  func auxIntToOp(cc int64) Op {
   675  	return Op(cc)
   676  }
   677  
   678  func boolToAuxInt(b bool) int64 {
   679  	if b {
   680  		return 1
   681  	}
   682  	return 0
   683  }
   684  func int8ToAuxInt(i int8) int64 {
   685  	return int64(i)
   686  }
   687  func int16ToAuxInt(i int16) int64 {
   688  	return int64(i)
   689  }
   690  func int32ToAuxInt(i int32) int64 {
   691  	return int64(i)
   692  }
   693  func int64ToAuxInt(i int64) int64 {
   694  	return int64(i)
   695  }
   696  func uint8ToAuxInt(i uint8) int64 {
   697  	return int64(int8(i))
   698  }
   699  func float32ToAuxInt(f float32) int64 {
   700  	return int64(math.Float64bits(float64(f)))
   701  }
   702  func float64ToAuxInt(f float64) int64 {
   703  	return int64(math.Float64bits(f))
   704  }
   705  func valAndOffToAuxInt(v ValAndOff) int64 {
   706  	return int64(v)
   707  }
   708  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   709  	return int64(v)
   710  }
   711  func int128ToAuxInt(x int128) int64 {
   712  	if x != 0 {
   713  		panic("nonzero int128 not allowed")
   714  	}
   715  	return 0
   716  }
   717  func flagConstantToAuxInt(x flagConstant) int64 {
   718  	return int64(x)
   719  }
   720  
   721  func opToAuxInt(o Op) int64 {
   722  	return int64(o)
   723  }
   724  
   725  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   726  type Aux interface {
   727  	CanBeAnSSAAux()
   728  }
   729  
   730  // for now only used to mark moves that need to avoid clobbering flags
   731  type auxMark bool
   732  
   733  func (auxMark) CanBeAnSSAAux() {}
   734  
   735  var AuxMark auxMark
   736  
   737  // stringAux wraps string values for use in Aux.
   738  type stringAux string
   739  
   740  func (stringAux) CanBeAnSSAAux() {}
   741  
   742  func auxToString(i Aux) string {
   743  	return string(i.(stringAux))
   744  }
   745  func auxToSym(i Aux) Sym {
   746  	// TODO: kind of a hack - allows nil interface through
   747  	s, _ := i.(Sym)
   748  	return s
   749  }
   750  func auxToType(i Aux) *types.Type {
   751  	return i.(*types.Type)
   752  }
   753  func auxToCall(i Aux) *AuxCall {
   754  	return i.(*AuxCall)
   755  }
   756  func auxToS390xCCMask(i Aux) s390x.CCMask {
   757  	return i.(s390x.CCMask)
   758  }
   759  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   760  	return i.(s390x.RotateParams)
   761  }
   762  
   763  func StringToAux(s string) Aux {
   764  	return stringAux(s)
   765  }
   766  func symToAux(s Sym) Aux {
   767  	return s
   768  }
   769  func callToAux(s *AuxCall) Aux {
   770  	return s
   771  }
   772  func typeToAux(t *types.Type) Aux {
   773  	return t
   774  }
   775  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   776  	return c
   777  }
   778  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   779  	return r
   780  }
   781  
   782  // uaddOvf reports whether unsigned a+b would overflow.
   783  func uaddOvf(a, b int64) bool {
   784  	return uint64(a)+uint64(b) < uint64(a)
   785  }
   786  
   787  // loadLSymOffset simulates reading a word at an offset into a
   788  // read-only symbol's runtime memory. If it would read a pointer to
   789  // another symbol, that symbol is returned. Otherwise, it returns nil.
   790  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   791  	if lsym.Type != objabi.SRODATA {
   792  		return nil
   793  	}
   794  
   795  	for _, r := range lsym.R {
   796  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   797  			return r.Sym
   798  		}
   799  	}
   800  
   801  	return nil
   802  }
   803  
   804  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   805  	v.Op = OpStaticLECall
   806  	auxcall := v.Aux.(*AuxCall)
   807  	auxcall.Fn = sym
   808  	// Remove first arg
   809  	v.Args[0].Uses--
   810  	copy(v.Args[0:], v.Args[1:])
   811  	v.Args[len(v.Args)-1] = nil // aid GC
   812  	v.Args = v.Args[:len(v.Args)-1]
   813  	if f := v.Block.Func; f.pass.debug > 0 {
   814  		f.Warnl(v.Pos, "de-virtualizing call")
   815  	}
   816  	return v
   817  }
   818  
   819  // isSamePtr reports whether p1 and p2 point to the same address.
   820  func isSamePtr(p1, p2 *Value) bool {
   821  	if p1 == p2 {
   822  		return true
   823  	}
   824  	if p1.Op != p2.Op {
   825  		return false
   826  	}
   827  	switch p1.Op {
   828  	case OpOffPtr:
   829  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   830  	case OpAddr, OpLocalAddr:
   831  		return p1.Aux == p2.Aux
   832  	case OpAddPtr:
   833  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   834  	}
   835  	return false
   836  }
   837  
   838  func isStackPtr(v *Value) bool {
   839  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   840  		v = v.Args[0]
   841  	}
   842  	return v.Op == OpSP || v.Op == OpLocalAddr
   843  }
   844  
   845  // disjoint reports whether the memory region specified by [p1:p1+n1)
   846  // does not overlap with [p2:p2+n2).
   847  // A return value of false does not imply the regions overlap.
   848  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   849  	if n1 == 0 || n2 == 0 {
   850  		return true
   851  	}
   852  	if p1 == p2 {
   853  		return false
   854  	}
   855  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   856  		base, offset = ptr, 0
   857  		for base.Op == OpOffPtr {
   858  			offset += base.AuxInt
   859  			base = base.Args[0]
   860  		}
   861  		if opcodeTable[base.Op].nilCheck {
   862  			base = base.Args[0]
   863  		}
   864  		return base, offset
   865  	}
   866  	p1, off1 := baseAndOffset(p1)
   867  	p2, off2 := baseAndOffset(p2)
   868  	if isSamePtr(p1, p2) {
   869  		return !overlap(off1, n1, off2, n2)
   870  	}
   871  	// p1 and p2 are not the same, so if they are both OpAddrs then
   872  	// they point to different variables.
   873  	// If one pointer is on the stack and the other is an argument
   874  	// then they can't overlap.
   875  	switch p1.Op {
   876  	case OpAddr, OpLocalAddr:
   877  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   878  			return true
   879  		}
   880  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   881  	case OpArg, OpArgIntReg:
   882  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   883  			return true
   884  		}
   885  	case OpSP:
   886  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   887  	}
   888  	return false
   889  }
   890  
   891  // moveSize returns the number of bytes an aligned MOV instruction moves.
   892  func moveSize(align int64, c *Config) int64 {
   893  	switch {
   894  	case align%8 == 0 && c.PtrSize == 8:
   895  		return 8
   896  	case align%4 == 0:
   897  		return 4
   898  	case align%2 == 0:
   899  		return 2
   900  	}
   901  	return 1
   902  }
   903  
   904  // mergePoint finds a block among a's blocks which dominates b and is itself
   905  // dominated by all of a's blocks. Returns nil if it can't find one.
   906  // Might return nil even if one does exist.
   907  func mergePoint(b *Block, a ...*Value) *Block {
   908  	// Walk backward from b looking for one of the a's blocks.
   909  
   910  	// Max distance
   911  	d := 100
   912  
   913  	for d > 0 {
   914  		for _, x := range a {
   915  			if b == x.Block {
   916  				goto found
   917  			}
   918  		}
   919  		if len(b.Preds) > 1 {
   920  			// Don't know which way to go back. Abort.
   921  			return nil
   922  		}
   923  		b = b.Preds[0].b
   924  		d--
   925  	}
   926  	return nil // too far away
   927  found:
   928  	// At this point, r is the first value in a that we find by walking backwards.
   929  	// if we return anything, r will be it.
   930  	r := b
   931  
   932  	// Keep going, counting the other a's that we find. They must all dominate r.
   933  	na := 0
   934  	for d > 0 {
   935  		for _, x := range a {
   936  			if b == x.Block {
   937  				na++
   938  			}
   939  		}
   940  		if na == len(a) {
   941  			// Found all of a in a backwards walk. We can return r.
   942  			return r
   943  		}
   944  		if len(b.Preds) > 1 {
   945  			return nil
   946  		}
   947  		b = b.Preds[0].b
   948  		d--
   949  
   950  	}
   951  	return nil // too far away
   952  }
   953  
   954  // clobber invalidates values. Returns true.
   955  // clobber is used by rewrite rules to:
   956  //
   957  //	A) make sure the values are really dead and never used again.
   958  //	B) decrement use counts of the values' args.
   959  func clobber(vv ...*Value) bool {
   960  	for _, v := range vv {
   961  		v.reset(OpInvalid)
   962  		// Note: leave v.Block intact.  The Block field is used after clobber.
   963  	}
   964  	return true
   965  }
   966  
   967  // clobberIfDead resets v when use count is 1. Returns true.
   968  // clobberIfDead is used by rewrite rules to decrement
   969  // use counts of v's args when v is dead and never used.
   970  func clobberIfDead(v *Value) bool {
   971  	if v.Uses == 1 {
   972  		v.reset(OpInvalid)
   973  	}
   974  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   975  	return true
   976  }
   977  
   978  // noteRule is an easy way to track if a rule is matched when writing
   979  // new ones.  Make the rule of interest also conditional on
   980  //
   981  //	noteRule("note to self: rule of interest matched")
   982  //
   983  // and that message will print when the rule matches.
   984  func noteRule(s string) bool {
   985  	fmt.Println(s)
   986  	return true
   987  }
   988  
   989  // countRule increments Func.ruleMatches[key].
   990  // If Func.ruleMatches is non-nil at the end
   991  // of compilation, it will be printed to stdout.
   992  // This is intended to make it easier to find which functions
   993  // which contain lots of rules matches when developing new rules.
   994  func countRule(v *Value, key string) bool {
   995  	f := v.Block.Func
   996  	if f.ruleMatches == nil {
   997  		f.ruleMatches = make(map[string]int)
   998  	}
   999  	f.ruleMatches[key]++
  1000  	return true
  1001  }
  1002  
  1003  // warnRule generates compiler debug output with string s when
  1004  // v is not in autogenerated code, cond is true and the rule has fired.
  1005  func warnRule(cond bool, v *Value, s string) bool {
  1006  	if pos := v.Pos; pos.Line() > 1 && cond {
  1007  		v.Block.Func.Warnl(pos, s)
  1008  	}
  1009  	return true
  1010  }
  1011  
  1012  // for a pseudo-op like (LessThan x), extract x.
  1013  func flagArg(v *Value) *Value {
  1014  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1015  		return nil
  1016  	}
  1017  	return v.Args[0]
  1018  }
  1019  
  1020  // arm64Negate finds the complement to an ARM64 condition code,
  1021  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1022  //
  1023  // For floating point, it's more subtle because NaN is unordered. We do
  1024  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1025  func arm64Negate(op Op) Op {
  1026  	switch op {
  1027  	case OpARM64LessThan:
  1028  		return OpARM64GreaterEqual
  1029  	case OpARM64LessThanU:
  1030  		return OpARM64GreaterEqualU
  1031  	case OpARM64GreaterThan:
  1032  		return OpARM64LessEqual
  1033  	case OpARM64GreaterThanU:
  1034  		return OpARM64LessEqualU
  1035  	case OpARM64LessEqual:
  1036  		return OpARM64GreaterThan
  1037  	case OpARM64LessEqualU:
  1038  		return OpARM64GreaterThanU
  1039  	case OpARM64GreaterEqual:
  1040  		return OpARM64LessThan
  1041  	case OpARM64GreaterEqualU:
  1042  		return OpARM64LessThanU
  1043  	case OpARM64Equal:
  1044  		return OpARM64NotEqual
  1045  	case OpARM64NotEqual:
  1046  		return OpARM64Equal
  1047  	case OpARM64LessThanF:
  1048  		return OpARM64NotLessThanF
  1049  	case OpARM64NotLessThanF:
  1050  		return OpARM64LessThanF
  1051  	case OpARM64LessEqualF:
  1052  		return OpARM64NotLessEqualF
  1053  	case OpARM64NotLessEqualF:
  1054  		return OpARM64LessEqualF
  1055  	case OpARM64GreaterThanF:
  1056  		return OpARM64NotGreaterThanF
  1057  	case OpARM64NotGreaterThanF:
  1058  		return OpARM64GreaterThanF
  1059  	case OpARM64GreaterEqualF:
  1060  		return OpARM64NotGreaterEqualF
  1061  	case OpARM64NotGreaterEqualF:
  1062  		return OpARM64GreaterEqualF
  1063  	default:
  1064  		panic("unreachable")
  1065  	}
  1066  }
  1067  
  1068  // arm64Invert evaluates (InvertFlags op), which
  1069  // is the same as altering the condition codes such
  1070  // that the same result would be produced if the arguments
  1071  // to the flag-generating instruction were reversed, e.g.
  1072  // (InvertFlags (CMP x y)) -> (CMP y x)
  1073  func arm64Invert(op Op) Op {
  1074  	switch op {
  1075  	case OpARM64LessThan:
  1076  		return OpARM64GreaterThan
  1077  	case OpARM64LessThanU:
  1078  		return OpARM64GreaterThanU
  1079  	case OpARM64GreaterThan:
  1080  		return OpARM64LessThan
  1081  	case OpARM64GreaterThanU:
  1082  		return OpARM64LessThanU
  1083  	case OpARM64LessEqual:
  1084  		return OpARM64GreaterEqual
  1085  	case OpARM64LessEqualU:
  1086  		return OpARM64GreaterEqualU
  1087  	case OpARM64GreaterEqual:
  1088  		return OpARM64LessEqual
  1089  	case OpARM64GreaterEqualU:
  1090  		return OpARM64LessEqualU
  1091  	case OpARM64Equal, OpARM64NotEqual:
  1092  		return op
  1093  	case OpARM64LessThanF:
  1094  		return OpARM64GreaterThanF
  1095  	case OpARM64GreaterThanF:
  1096  		return OpARM64LessThanF
  1097  	case OpARM64LessEqualF:
  1098  		return OpARM64GreaterEqualF
  1099  	case OpARM64GreaterEqualF:
  1100  		return OpARM64LessEqualF
  1101  	case OpARM64NotLessThanF:
  1102  		return OpARM64NotGreaterThanF
  1103  	case OpARM64NotGreaterThanF:
  1104  		return OpARM64NotLessThanF
  1105  	case OpARM64NotLessEqualF:
  1106  		return OpARM64NotGreaterEqualF
  1107  	case OpARM64NotGreaterEqualF:
  1108  		return OpARM64NotLessEqualF
  1109  	default:
  1110  		panic("unreachable")
  1111  	}
  1112  }
  1113  
  1114  // evaluate an ARM64 op against a flags value
  1115  // that is potentially constant; return 1 for true,
  1116  // -1 for false, and 0 for not constant.
  1117  func ccARM64Eval(op Op, flags *Value) int {
  1118  	fop := flags.Op
  1119  	if fop == OpARM64InvertFlags {
  1120  		return -ccARM64Eval(op, flags.Args[0])
  1121  	}
  1122  	if fop != OpARM64FlagConstant {
  1123  		return 0
  1124  	}
  1125  	fc := flagConstant(flags.AuxInt)
  1126  	b2i := func(b bool) int {
  1127  		if b {
  1128  			return 1
  1129  		}
  1130  		return -1
  1131  	}
  1132  	switch op {
  1133  	case OpARM64Equal:
  1134  		return b2i(fc.eq())
  1135  	case OpARM64NotEqual:
  1136  		return b2i(fc.ne())
  1137  	case OpARM64LessThan:
  1138  		return b2i(fc.lt())
  1139  	case OpARM64LessThanU:
  1140  		return b2i(fc.ult())
  1141  	case OpARM64GreaterThan:
  1142  		return b2i(fc.gt())
  1143  	case OpARM64GreaterThanU:
  1144  		return b2i(fc.ugt())
  1145  	case OpARM64LessEqual:
  1146  		return b2i(fc.le())
  1147  	case OpARM64LessEqualU:
  1148  		return b2i(fc.ule())
  1149  	case OpARM64GreaterEqual:
  1150  		return b2i(fc.ge())
  1151  	case OpARM64GreaterEqualU:
  1152  		return b2i(fc.uge())
  1153  	}
  1154  	return 0
  1155  }
  1156  
  1157  // logRule logs the use of the rule s. This will only be enabled if
  1158  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1159  func logRule(s string) {
  1160  	if ruleFile == nil {
  1161  		// Open a log file to write log to. We open in append
  1162  		// mode because all.bash runs the compiler lots of times,
  1163  		// and we want the concatenation of all of those logs.
  1164  		// This means, of course, that users need to rm the old log
  1165  		// to get fresh data.
  1166  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1167  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1168  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1169  		if err != nil {
  1170  			panic(err)
  1171  		}
  1172  		ruleFile = w
  1173  	}
  1174  	_, err := fmt.Fprintln(ruleFile, s)
  1175  	if err != nil {
  1176  		panic(err)
  1177  	}
  1178  }
  1179  
  1180  var ruleFile io.Writer
  1181  
  1182  func isConstZero(v *Value) bool {
  1183  	switch v.Op {
  1184  	case OpConstNil:
  1185  		return true
  1186  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1187  		return v.AuxInt == 0
  1188  	case OpStringMake, OpIMake, OpComplexMake:
  1189  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1190  	case OpSliceMake:
  1191  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1192  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1193  		return isConstZero(v.Args[0])
  1194  	}
  1195  	return false
  1196  }
  1197  
  1198  // reciprocalExact64 reports whether 1/c is exactly representable.
  1199  func reciprocalExact64(c float64) bool {
  1200  	b := math.Float64bits(c)
  1201  	man := b & (1<<52 - 1)
  1202  	if man != 0 {
  1203  		return false // not a power of 2, denormal, or NaN
  1204  	}
  1205  	exp := b >> 52 & (1<<11 - 1)
  1206  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1207  	// changes the exponent to 0x7fe-exp.
  1208  	switch exp {
  1209  	case 0:
  1210  		return false // ±0
  1211  	case 0x7ff:
  1212  		return false // ±inf
  1213  	case 0x7fe:
  1214  		return false // exponent is not representable
  1215  	default:
  1216  		return true
  1217  	}
  1218  }
  1219  
  1220  // reciprocalExact32 reports whether 1/c is exactly representable.
  1221  func reciprocalExact32(c float32) bool {
  1222  	b := math.Float32bits(c)
  1223  	man := b & (1<<23 - 1)
  1224  	if man != 0 {
  1225  		return false // not a power of 2, denormal, or NaN
  1226  	}
  1227  	exp := b >> 23 & (1<<8 - 1)
  1228  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1229  	// changes the exponent to 0xfe-exp.
  1230  	switch exp {
  1231  	case 0:
  1232  		return false // ±0
  1233  	case 0xff:
  1234  		return false // ±inf
  1235  	case 0xfe:
  1236  		return false // exponent is not representable
  1237  	default:
  1238  		return true
  1239  	}
  1240  }
  1241  
  1242  // check if an immediate can be directly encoded into an ARM's instruction.
  1243  func isARMImmRot(v uint32) bool {
  1244  	for i := 0; i < 16; i++ {
  1245  		if v&^0xff == 0 {
  1246  			return true
  1247  		}
  1248  		v = v<<2 | v>>30
  1249  	}
  1250  
  1251  	return false
  1252  }
  1253  
  1254  // overlap reports whether the ranges given by the given offset and
  1255  // size pairs overlap.
  1256  func overlap(offset1, size1, offset2, size2 int64) bool {
  1257  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1258  		return true
  1259  	}
  1260  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1261  		return true
  1262  	}
  1263  	return false
  1264  }
  1265  
  1266  // check if value zeroes out upper 32-bit of 64-bit register.
  1267  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1268  // because it catches same amount of cases as 4.
  1269  func zeroUpper32Bits(x *Value, depth int) bool {
  1270  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1271  		// If the value is signed, it might get re-sign-extended
  1272  		// during spill and restore. See issue 68227.
  1273  		return false
  1274  	}
  1275  	switch x.Op {
  1276  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1277  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1278  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1279  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1280  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1281  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1282  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1283  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1284  		OpAMD64SHLL, OpAMD64SHLLconst:
  1285  		return true
  1286  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1287  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1288  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1289  		return true
  1290  	case OpArg: // note: but not ArgIntReg
  1291  		// amd64 always loads args from the stack unsigned.
  1292  		// most other architectures load them sign/zero extended based on the type.
  1293  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1294  	case OpPhi, OpSelect0, OpSelect1:
  1295  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1296  		// just limit recursion depth.
  1297  		if depth <= 0 {
  1298  			return false
  1299  		}
  1300  		for i := range x.Args {
  1301  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1302  				return false
  1303  			}
  1304  		}
  1305  		return true
  1306  
  1307  	}
  1308  	return false
  1309  }
  1310  
  1311  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1312  func zeroUpper48Bits(x *Value, depth int) bool {
  1313  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1314  		return false
  1315  	}
  1316  	switch x.Op {
  1317  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1318  		return true
  1319  	case OpArg: // note: but not ArgIntReg
  1320  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1321  	case OpPhi, OpSelect0, OpSelect1:
  1322  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1323  		// just limit recursion depth.
  1324  		if depth <= 0 {
  1325  			return false
  1326  		}
  1327  		for i := range x.Args {
  1328  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1329  				return false
  1330  			}
  1331  		}
  1332  		return true
  1333  
  1334  	}
  1335  	return false
  1336  }
  1337  
  1338  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1339  func zeroUpper56Bits(x *Value, depth int) bool {
  1340  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1341  		return false
  1342  	}
  1343  	switch x.Op {
  1344  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1345  		return true
  1346  	case OpArg: // note: but not ArgIntReg
  1347  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1348  	case OpPhi, OpSelect0, OpSelect1:
  1349  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1350  		// just limit recursion depth.
  1351  		if depth <= 0 {
  1352  			return false
  1353  		}
  1354  		for i := range x.Args {
  1355  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1356  				return false
  1357  			}
  1358  		}
  1359  		return true
  1360  
  1361  	}
  1362  	return false
  1363  }
  1364  
  1365  func isInlinableMemclr(c *Config, sz int64) bool {
  1366  	if sz < 0 {
  1367  		return false
  1368  	}
  1369  	// TODO: expand this check to allow other architectures
  1370  	// see CL 454255 and issue 56997
  1371  	switch c.arch {
  1372  	case "amd64", "arm64":
  1373  		return true
  1374  	case "ppc64le", "ppc64", "loong64":
  1375  		return sz < 512
  1376  	}
  1377  	return false
  1378  }
  1379  
  1380  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1381  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1382  // safe, either because Move will do all of its loads before any of its stores, or
  1383  // because the arguments are known to be disjoint.
  1384  // This is used as a check for replacing memmove with Move ops.
  1385  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1386  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1387  	// Move ops may or may not be faster for large sizes depending on how the platform
  1388  	// lowers them, so we only perform this optimization on platforms that we know to
  1389  	// have fast Move ops.
  1390  	switch c.arch {
  1391  	case "amd64":
  1392  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1393  	case "386", "arm64":
  1394  		return sz <= 8
  1395  	case "s390x", "ppc64", "ppc64le":
  1396  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1397  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1398  		return sz <= 4
  1399  	}
  1400  	return false
  1401  }
  1402  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1403  	return isInlinableMemmove(dst, src, sz, c)
  1404  }
  1405  
  1406  // logLargeCopy logs the occurrence of a large copy.
  1407  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1408  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1409  func logLargeCopy(v *Value, s int64) bool {
  1410  	if s < 128 {
  1411  		return true
  1412  	}
  1413  	if logopt.Enabled() {
  1414  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1415  	}
  1416  	return true
  1417  }
  1418  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1419  	if s < 128 {
  1420  		return
  1421  	}
  1422  	if logopt.Enabled() {
  1423  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1424  	}
  1425  }
  1426  
  1427  // hasSmallRotate reports whether the architecture has rotate instructions
  1428  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1429  func hasSmallRotate(c *Config) bool {
  1430  	switch c.arch {
  1431  	case "amd64", "386":
  1432  		return true
  1433  	default:
  1434  		return false
  1435  	}
  1436  }
  1437  
  1438  func supportsPPC64PCRel() bool {
  1439  	// PCRel is currently supported for >= power10, linux only
  1440  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1441  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1442  }
  1443  
  1444  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1445  	if sh < 0 || sh >= sz {
  1446  		panic("PPC64 shift arg sh out of range")
  1447  	}
  1448  	if mb < 0 || mb >= sz {
  1449  		panic("PPC64 shift arg mb out of range")
  1450  	}
  1451  	if me < 0 || me >= sz {
  1452  		panic("PPC64 shift arg me out of range")
  1453  	}
  1454  	return int32(sh<<16 | mb<<8 | me)
  1455  }
  1456  
  1457  func GetPPC64Shiftsh(auxint int64) int64 {
  1458  	return int64(int8(auxint >> 16))
  1459  }
  1460  
  1461  func GetPPC64Shiftmb(auxint int64) int64 {
  1462  	return int64(int8(auxint >> 8))
  1463  }
  1464  
  1465  func GetPPC64Shiftme(auxint int64) int64 {
  1466  	return int64(int8(auxint))
  1467  }
  1468  
  1469  // Test if this value can encoded as a mask for a rlwinm like
  1470  // operation.  Masks can also extend from the msb and wrap to
  1471  // the lsb too.  That is, the valid masks are 32 bit strings
  1472  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1473  //
  1474  // Note: This ignores the upper 32 bits of the input. When a
  1475  // zero extended result is desired (e.g a 64 bit result), the
  1476  // user must verify the upper 32 bits are 0 and the mask is
  1477  // contiguous (that is, non-wrapping).
  1478  func isPPC64WordRotateMask(v64 int64) bool {
  1479  	// Isolate rightmost 1 (if none 0) and add.
  1480  	v := uint32(v64)
  1481  	vp := (v & -v) + v
  1482  	// Likewise, for the wrapping case.
  1483  	vn := ^v
  1484  	vpn := (vn & -vn) + vn
  1485  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1486  }
  1487  
  1488  // Test if this mask is a valid, contiguous bitmask which can be
  1489  // represented by a RLWNM mask and also clears the upper 32 bits
  1490  // of the register.
  1491  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1492  	// Isolate rightmost 1 (if none 0) and add.
  1493  	v := uint32(v64)
  1494  	vp := (v & -v) + v
  1495  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1496  }
  1497  
  1498  // Compress mask and shift into single value of the form
  1499  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1500  // be used to regenerate the input mask.
  1501  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1502  	var mb, me, mbn, men int
  1503  
  1504  	// Determine boundaries and then decode them
  1505  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1506  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1507  	} else if nbits == 32 {
  1508  		mb = bits.LeadingZeros32(uint32(mask))
  1509  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1510  		mbn = bits.LeadingZeros32(^uint32(mask))
  1511  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1512  	} else {
  1513  		mb = bits.LeadingZeros64(uint64(mask))
  1514  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1515  		mbn = bits.LeadingZeros64(^uint64(mask))
  1516  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1517  	}
  1518  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1519  	if mb == 0 && me == int(nbits) {
  1520  		// swap the inverted values
  1521  		mb, me = men, mbn
  1522  	}
  1523  
  1524  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1525  }
  1526  
  1527  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1528  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1529  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1530  // operations can be combined. This functions assumes the two opcodes can
  1531  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1532  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1533  	mb := s
  1534  	r := 64 - s
  1535  	// A larger mb is a smaller mask.
  1536  	if (encoded>>8)&0xFF < mb {
  1537  		encoded = (encoded &^ 0xFF00) | mb<<8
  1538  	}
  1539  	// The rotate is expected to be 0.
  1540  	if (encoded & 0xFF0000) != 0 {
  1541  		panic("non-zero rotate")
  1542  	}
  1543  	return encoded | r<<16
  1544  }
  1545  
  1546  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1547  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1548  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1549  	auxint := uint64(sauxint)
  1550  	rotate = int64((auxint >> 16) & 0xFF)
  1551  	mb = int64((auxint >> 8) & 0xFF)
  1552  	me = int64((auxint >> 0) & 0xFF)
  1553  	nbits := int64((auxint >> 24) & 0xFF)
  1554  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1555  	if mb > me {
  1556  		mask = ^mask
  1557  	}
  1558  	if nbits == 32 {
  1559  		mask = uint64(uint32(mask))
  1560  	}
  1561  
  1562  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1563  	// is inclusive.
  1564  	me = (me - 1) & (nbits - 1)
  1565  	return
  1566  }
  1567  
  1568  // This verifies that the mask is a set of
  1569  // consecutive bits including the least
  1570  // significant bit.
  1571  func isPPC64ValidShiftMask(v int64) bool {
  1572  	if (v != 0) && ((v+1)&v) == 0 {
  1573  		return true
  1574  	}
  1575  	return false
  1576  }
  1577  
  1578  func getPPC64ShiftMaskLength(v int64) int64 {
  1579  	return int64(bits.Len64(uint64(v)))
  1580  }
  1581  
  1582  // Decompose a shift right into an equivalent rotate/mask,
  1583  // and return mask & m.
  1584  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1585  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1586  	return m & int64(smask)
  1587  }
  1588  
  1589  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1590  func mergePPC64AndSrwi(m, s int64) int64 {
  1591  	mask := mergePPC64RShiftMask(m, s, 32)
  1592  	if !isPPC64WordRotateMask(mask) {
  1593  		return 0
  1594  	}
  1595  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1596  }
  1597  
  1598  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1599  func mergePPC64AndSrdi(m, s int64) int64 {
  1600  	mask := mergePPC64RShiftMask(m, s, 64)
  1601  
  1602  	// Verify the rotate and mask result only uses the lower 32 bits.
  1603  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1604  	if rv&uint64(mask) != 0 {
  1605  		return 0
  1606  	}
  1607  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1608  		return 0
  1609  	}
  1610  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1611  }
  1612  
  1613  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1614  func mergePPC64AndSldi(m, s int64) int64 {
  1615  	mask := -1 << s & m
  1616  
  1617  	// Verify the rotate and mask result only uses the lower 32 bits.
  1618  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1619  	if rv&uint64(mask) != 0 {
  1620  		return 0
  1621  	}
  1622  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1623  		return 0
  1624  	}
  1625  	return encodePPC64RotateMask(s&31, mask, 32)
  1626  }
  1627  
  1628  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1629  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1630  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1631  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1632  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1633  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1634  
  1635  	// Rewrite mask to apply after the final left shift.
  1636  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1637  
  1638  	r_1 := 32 - srw
  1639  	r_2 := GetPPC64Shiftsh(sld)
  1640  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1641  
  1642  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1643  		return 0
  1644  	}
  1645  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1646  }
  1647  
  1648  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1649  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1650  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1651  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1652  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1653  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1654  
  1655  	// Rewrite mask to apply after the final left shift.
  1656  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1657  
  1658  	r_1 := 64 - srd
  1659  	r_2 := GetPPC64Shiftsh(sld)
  1660  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1661  
  1662  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1663  		return 0
  1664  	}
  1665  	// This combine only works when selecting and shifting the lower 32 bits.
  1666  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1667  	if v1&mask_3 != 0 {
  1668  		return 0
  1669  	}
  1670  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1671  }
  1672  
  1673  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1674  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1675  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1676  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1677  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1678  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1679  
  1680  	// combine the masks, and adjust for the final left shift.
  1681  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1682  	r_2 := GetPPC64Shiftsh(int64(sld))
  1683  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1684  
  1685  	// Verify the result is still a valid bitmask of <= 32 bits.
  1686  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1687  		return 0
  1688  	}
  1689  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1690  }
  1691  
  1692  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1693  // or 0 if they cannot be merged.
  1694  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1695  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1696  	mask_out := (mask_rlw & uint64(mask))
  1697  
  1698  	// Verify the result is still a valid bitmask of <= 32 bits.
  1699  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1700  		return 0
  1701  	}
  1702  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1703  }
  1704  
  1705  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1706  // result. Return rlw if it does, 0 otherwise.
  1707  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1708  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1709  	if mb > me {
  1710  		return 0
  1711  	}
  1712  	return rlw
  1713  }
  1714  
  1715  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1716  // or 0 if they cannot be merged.
  1717  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1718  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1719  
  1720  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1721  	r_mask := bits.RotateLeft32(mask, int(r))
  1722  
  1723  	mask_out := (mask_rlw & uint64(r_mask))
  1724  
  1725  	// Verify the result is still a valid bitmask of <= 32 bits.
  1726  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1727  		return 0
  1728  	}
  1729  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1730  }
  1731  
  1732  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1733  // or 0 if they cannot be merged.
  1734  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1735  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1736  	if mb > me || mb < sldi {
  1737  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1738  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1739  		return 0
  1740  	}
  1741  	// combine the masks, and adjust for the final left shift.
  1742  	mask_3 := mask_1 << sldi
  1743  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1744  
  1745  	// Verify the result is still a valid bitmask of <= 32 bits.
  1746  	if uint64(uint32(mask_3)) != mask_3 {
  1747  		return 0
  1748  	}
  1749  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1750  }
  1751  
  1752  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1753  // or return 0 if they cannot be combined.
  1754  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1755  	if sld > srw || srw >= 32 {
  1756  		return 0
  1757  	}
  1758  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1759  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1760  	mask := (mask_r & mask_l) << uint(sld)
  1761  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1762  }
  1763  
  1764  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1765  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1766  // of op.
  1767  //
  1768  // E.g consider the case:
  1769  // a = (ADD x y)
  1770  // b = (CMPconst [0] a)
  1771  // c = (OR a z)
  1772  //
  1773  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1774  // would produce:
  1775  // a  = (ADD x y)
  1776  // a' = (ADDCC x y)
  1777  // a” = (Select0 a')
  1778  // b  = (CMPconst [0] a”)
  1779  // c  = (OR a z)
  1780  //
  1781  // which makes it impossible to rewrite the second user. Instead the result
  1782  // of this conversion is:
  1783  // a' = (ADDCC x y)
  1784  // a  = (Select0 a')
  1785  // b  = (CMPconst [0] a)
  1786  // c  = (OR a z)
  1787  //
  1788  // Which makes it trivial to rewrite b using a lowering rule.
  1789  func convertPPC64OpToOpCC(op *Value) *Value {
  1790  	ccOpMap := map[Op]Op{
  1791  		OpPPC64ADD:      OpPPC64ADDCC,
  1792  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1793  		OpPPC64AND:      OpPPC64ANDCC,
  1794  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1795  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1796  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1797  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1798  		OpPPC64NEG:      OpPPC64NEGCC,
  1799  		OpPPC64NOR:      OpPPC64NORCC,
  1800  		OpPPC64OR:       OpPPC64ORCC,
  1801  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1802  		OpPPC64SUB:      OpPPC64SUBCC,
  1803  		OpPPC64XOR:      OpPPC64XORCC,
  1804  	}
  1805  	b := op.Block
  1806  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1807  	opCC.AddArgs(op.Args...)
  1808  	op.reset(OpSelect0)
  1809  	op.AddArgs(opCC)
  1810  	return op
  1811  }
  1812  
  1813  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1814  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1815  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1816  	if r != 0 || mask&0xFFFF != mask {
  1817  		return 0
  1818  	}
  1819  	return int64(mask)
  1820  }
  1821  
  1822  // Convenience function to rotate a 32 bit constant value by another constant.
  1823  func rotateLeft32(v, rotate int64) int64 {
  1824  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1825  }
  1826  
  1827  func rotateRight64(v, rotate int64) int64 {
  1828  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1829  }
  1830  
  1831  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1832  func armBFAuxInt(lsb, width int64) arm64BitField {
  1833  	if lsb < 0 || lsb > 63 {
  1834  		panic("ARM(64) bit field lsb constant out of range")
  1835  	}
  1836  	if width < 1 || lsb+width > 64 {
  1837  		panic("ARM(64) bit field width constant out of range")
  1838  	}
  1839  	return arm64BitField(width | lsb<<8)
  1840  }
  1841  
  1842  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1843  func (bfc arm64BitField) lsb() int64 {
  1844  	return int64(uint64(bfc) >> 8)
  1845  }
  1846  
  1847  // returns the width part of the auxInt field of arm64 bitfield ops.
  1848  func (bfc arm64BitField) width() int64 {
  1849  	return int64(bfc) & 0xff
  1850  }
  1851  
  1852  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1853  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1854  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1855  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1856  }
  1857  
  1858  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1859  func arm64BFWidth(mask, rshift int64) int64 {
  1860  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1861  	if shiftedMask == 0 {
  1862  		panic("ARM64 BF mask is zero")
  1863  	}
  1864  	return nto(shiftedMask)
  1865  }
  1866  
  1867  // registerizable reports whether t is a primitive type that fits in
  1868  // a register. It assumes float64 values will always fit into registers
  1869  // even if that isn't strictly true.
  1870  func registerizable(b *Block, typ *types.Type) bool {
  1871  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1872  		return true
  1873  	}
  1874  	if typ.IsInteger() {
  1875  		return typ.Size() <= b.Func.Config.RegSize
  1876  	}
  1877  	return false
  1878  }
  1879  
  1880  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1881  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1882  	f := v.Block.Func
  1883  	if !f.Config.Race {
  1884  		return false
  1885  	}
  1886  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1887  		return false
  1888  	}
  1889  	for _, b := range f.Blocks {
  1890  		for _, v := range b.Values {
  1891  			switch v.Op {
  1892  			case OpStaticCall, OpStaticLECall:
  1893  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1894  				// Allow calls to panic*
  1895  				s := v.Aux.(*AuxCall).Fn.String()
  1896  				switch s {
  1897  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1898  					"runtime.panicdivide", "runtime.panicwrap",
  1899  					"runtime.panicshift":
  1900  					continue
  1901  				}
  1902  				// If we encountered any call, we need to keep racefunc*,
  1903  				// for accurate stacktraces.
  1904  				return false
  1905  			case OpPanicBounds, OpPanicExtend:
  1906  				// Note: these are panic generators that are ok (like the static calls above).
  1907  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1908  				// We must keep the race functions if there are any other call types.
  1909  				return false
  1910  			}
  1911  		}
  1912  	}
  1913  	if isSameCall(sym, "runtime.racefuncenter") {
  1914  		// TODO REGISTER ABI this needs to be cleaned up.
  1915  		// If we're removing racefuncenter, remove its argument as well.
  1916  		if v.Args[0].Op != OpStore {
  1917  			if v.Op == OpStaticLECall {
  1918  				// there is no store, yet.
  1919  				return true
  1920  			}
  1921  			return false
  1922  		}
  1923  		mem := v.Args[0].Args[2]
  1924  		v.Args[0].reset(OpCopy)
  1925  		v.Args[0].AddArg(mem)
  1926  	}
  1927  	return true
  1928  }
  1929  
  1930  // symIsRO reports whether sym is a read-only global.
  1931  func symIsRO(sym interface{}) bool {
  1932  	lsym := sym.(*obj.LSym)
  1933  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1934  }
  1935  
  1936  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1937  func symIsROZero(sym Sym) bool {
  1938  	lsym := sym.(*obj.LSym)
  1939  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1940  		return false
  1941  	}
  1942  	for _, b := range lsym.P {
  1943  		if b != 0 {
  1944  			return false
  1945  		}
  1946  	}
  1947  	return true
  1948  }
  1949  
  1950  // isFixed32 returns true if the int32 at offset off in symbol sym
  1951  // is known and constant.
  1952  func isFixed32(c *Config, sym Sym, off int64) bool {
  1953  	return isFixed(c, sym, off, 4)
  1954  }
  1955  
  1956  // isFixed returns true if the range [off,off+size] of the symbol sym
  1957  // is known and constant.
  1958  func isFixed(c *Config, sym Sym, off, size int64) bool {
  1959  	lsym := sym.(*obj.LSym)
  1960  	if lsym.Extra == nil {
  1961  		return false
  1962  	}
  1963  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1964  		if off == 2*c.PtrSize && size == 4 {
  1965  			return true // type hash field
  1966  		}
  1967  	}
  1968  	return false
  1969  }
  1970  func fixed32(c *Config, sym Sym, off int64) int32 {
  1971  	lsym := sym.(*obj.LSym)
  1972  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1973  		if off == 2*c.PtrSize {
  1974  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  1975  		}
  1976  	}
  1977  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  1978  	return 0
  1979  }
  1980  
  1981  // isFixedSym returns true if the contents of sym at the given offset
  1982  // is known and is the constant address of another symbol.
  1983  func isFixedSym(sym Sym, off int64) bool {
  1984  	lsym := sym.(*obj.LSym)
  1985  	switch {
  1986  	case lsym.Type == objabi.SRODATA:
  1987  		// itabs, dictionaries
  1988  	default:
  1989  		return false
  1990  	}
  1991  	for _, r := range lsym.R {
  1992  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  1993  			return true
  1994  		}
  1995  	}
  1996  	return false
  1997  }
  1998  func fixedSym(f *Func, sym Sym, off int64) Sym {
  1999  	lsym := sym.(*obj.LSym)
  2000  	for _, r := range lsym.R {
  2001  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  2002  			if strings.HasPrefix(r.Sym.Name, "type:") {
  2003  				// In case we're loading a type out of a dictionary, we need to record
  2004  				// that the containing function might put that type in an interface.
  2005  				// That information is currently recorded in relocations in the dictionary,
  2006  				// but if we perform this load at compile time then the dictionary
  2007  				// might be dead.
  2008  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2009  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2010  				// Same, but if we're using an itab we need to record that the
  2011  				// itab._type might be put in an interface.
  2012  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2013  			}
  2014  			return r.Sym
  2015  		}
  2016  	}
  2017  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2018  	return nil
  2019  }
  2020  
  2021  // read8 reads one byte from the read-only global sym at offset off.
  2022  func read8(sym interface{}, off int64) uint8 {
  2023  	lsym := sym.(*obj.LSym)
  2024  	if off >= int64(len(lsym.P)) || off < 0 {
  2025  		// Invalid index into the global sym.
  2026  		// This can happen in dead code, so we don't want to panic.
  2027  		// Just return any value, it will eventually get ignored.
  2028  		// See issue 29215.
  2029  		return 0
  2030  	}
  2031  	return lsym.P[off]
  2032  }
  2033  
  2034  // read16 reads two bytes from the read-only global sym at offset off.
  2035  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  2036  	lsym := sym.(*obj.LSym)
  2037  	// lsym.P is written lazily.
  2038  	// Bytes requested after the end of lsym.P are 0.
  2039  	var src []byte
  2040  	if 0 <= off && off < int64(len(lsym.P)) {
  2041  		src = lsym.P[off:]
  2042  	}
  2043  	buf := make([]byte, 2)
  2044  	copy(buf, src)
  2045  	return byteorder.Uint16(buf)
  2046  }
  2047  
  2048  // read32 reads four bytes from the read-only global sym at offset off.
  2049  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  2050  	lsym := sym.(*obj.LSym)
  2051  	var src []byte
  2052  	if 0 <= off && off < int64(len(lsym.P)) {
  2053  		src = lsym.P[off:]
  2054  	}
  2055  	buf := make([]byte, 4)
  2056  	copy(buf, src)
  2057  	return byteorder.Uint32(buf)
  2058  }
  2059  
  2060  // read64 reads eight bytes from the read-only global sym at offset off.
  2061  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  2062  	lsym := sym.(*obj.LSym)
  2063  	var src []byte
  2064  	if 0 <= off && off < int64(len(lsym.P)) {
  2065  		src = lsym.P[off:]
  2066  	}
  2067  	buf := make([]byte, 8)
  2068  	copy(buf, src)
  2069  	return byteorder.Uint64(buf)
  2070  }
  2071  
  2072  // sequentialAddresses reports true if it can prove that x + n == y
  2073  func sequentialAddresses(x, y *Value, n int64) bool {
  2074  	if x == y && n == 0 {
  2075  		return true
  2076  	}
  2077  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2078  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2079  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2080  		return true
  2081  	}
  2082  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2083  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2084  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2085  		return true
  2086  	}
  2087  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2088  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2089  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2090  		return true
  2091  	}
  2092  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2093  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2094  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2095  		return true
  2096  	}
  2097  	return false
  2098  }
  2099  
  2100  // flagConstant represents the result of a compile-time comparison.
  2101  // The sense of these flags does not necessarily represent the hardware's notion
  2102  // of a flags register - these are just a compile-time construct.
  2103  // We happen to match the semantics to those of arm/arm64.
  2104  // Note that these semantics differ from x86: the carry flag has the opposite
  2105  // sense on a subtraction!
  2106  //
  2107  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2108  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2109  //	 (because it does x + ^y + C).
  2110  //
  2111  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2112  type flagConstant uint8
  2113  
  2114  // N reports whether the result of an operation is negative (high bit set).
  2115  func (fc flagConstant) N() bool {
  2116  	return fc&1 != 0
  2117  }
  2118  
  2119  // Z reports whether the result of an operation is 0.
  2120  func (fc flagConstant) Z() bool {
  2121  	return fc&2 != 0
  2122  }
  2123  
  2124  // C reports whether an unsigned add overflowed (carry), or an
  2125  // unsigned subtract did not underflow (borrow).
  2126  func (fc flagConstant) C() bool {
  2127  	return fc&4 != 0
  2128  }
  2129  
  2130  // V reports whether a signed operation overflowed or underflowed.
  2131  func (fc flagConstant) V() bool {
  2132  	return fc&8 != 0
  2133  }
  2134  
  2135  func (fc flagConstant) eq() bool {
  2136  	return fc.Z()
  2137  }
  2138  func (fc flagConstant) ne() bool {
  2139  	return !fc.Z()
  2140  }
  2141  func (fc flagConstant) lt() bool {
  2142  	return fc.N() != fc.V()
  2143  }
  2144  func (fc flagConstant) le() bool {
  2145  	return fc.Z() || fc.lt()
  2146  }
  2147  func (fc flagConstant) gt() bool {
  2148  	return !fc.Z() && fc.ge()
  2149  }
  2150  func (fc flagConstant) ge() bool {
  2151  	return fc.N() == fc.V()
  2152  }
  2153  func (fc flagConstant) ult() bool {
  2154  	return !fc.C()
  2155  }
  2156  func (fc flagConstant) ule() bool {
  2157  	return fc.Z() || fc.ult()
  2158  }
  2159  func (fc flagConstant) ugt() bool {
  2160  	return !fc.Z() && fc.uge()
  2161  }
  2162  func (fc flagConstant) uge() bool {
  2163  	return fc.C()
  2164  }
  2165  
  2166  func (fc flagConstant) ltNoov() bool {
  2167  	return fc.lt() && !fc.V()
  2168  }
  2169  func (fc flagConstant) leNoov() bool {
  2170  	return fc.le() && !fc.V()
  2171  }
  2172  func (fc flagConstant) gtNoov() bool {
  2173  	return fc.gt() && !fc.V()
  2174  }
  2175  func (fc flagConstant) geNoov() bool {
  2176  	return fc.ge() && !fc.V()
  2177  }
  2178  
  2179  func (fc flagConstant) String() string {
  2180  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2181  }
  2182  
  2183  type flagConstantBuilder struct {
  2184  	N bool
  2185  	Z bool
  2186  	C bool
  2187  	V bool
  2188  }
  2189  
  2190  func (fcs flagConstantBuilder) encode() flagConstant {
  2191  	var fc flagConstant
  2192  	if fcs.N {
  2193  		fc |= 1
  2194  	}
  2195  	if fcs.Z {
  2196  		fc |= 2
  2197  	}
  2198  	if fcs.C {
  2199  		fc |= 4
  2200  	}
  2201  	if fcs.V {
  2202  		fc |= 8
  2203  	}
  2204  	return fc
  2205  }
  2206  
  2207  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2208  //  - the results of the C flag are different
  2209  //  - the results of the V flag when y==minint are different
  2210  
  2211  // addFlags64 returns the flags that would be set from computing x+y.
  2212  func addFlags64(x, y int64) flagConstant {
  2213  	var fcb flagConstantBuilder
  2214  	fcb.Z = x+y == 0
  2215  	fcb.N = x+y < 0
  2216  	fcb.C = uint64(x+y) < uint64(x)
  2217  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2218  	return fcb.encode()
  2219  }
  2220  
  2221  // subFlags64 returns the flags that would be set from computing x-y.
  2222  func subFlags64(x, y int64) flagConstant {
  2223  	var fcb flagConstantBuilder
  2224  	fcb.Z = x-y == 0
  2225  	fcb.N = x-y < 0
  2226  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2227  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2228  	return fcb.encode()
  2229  }
  2230  
  2231  // addFlags32 returns the flags that would be set from computing x+y.
  2232  func addFlags32(x, y int32) flagConstant {
  2233  	var fcb flagConstantBuilder
  2234  	fcb.Z = x+y == 0
  2235  	fcb.N = x+y < 0
  2236  	fcb.C = uint32(x+y) < uint32(x)
  2237  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2238  	return fcb.encode()
  2239  }
  2240  
  2241  // subFlags32 returns the flags that would be set from computing x-y.
  2242  func subFlags32(x, y int32) flagConstant {
  2243  	var fcb flagConstantBuilder
  2244  	fcb.Z = x-y == 0
  2245  	fcb.N = x-y < 0
  2246  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2247  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2248  	return fcb.encode()
  2249  }
  2250  
  2251  // logicFlags64 returns flags set to the sign/zeroness of x.
  2252  // C and V are set to false.
  2253  func logicFlags64(x int64) flagConstant {
  2254  	var fcb flagConstantBuilder
  2255  	fcb.Z = x == 0
  2256  	fcb.N = x < 0
  2257  	return fcb.encode()
  2258  }
  2259  
  2260  // logicFlags32 returns flags set to the sign/zeroness of x.
  2261  // C and V are set to false.
  2262  func logicFlags32(x int32) flagConstant {
  2263  	var fcb flagConstantBuilder
  2264  	fcb.Z = x == 0
  2265  	fcb.N = x < 0
  2266  	return fcb.encode()
  2267  }
  2268  
  2269  func makeJumpTableSym(b *Block) *obj.LSym {
  2270  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2271  	// The jump table symbol is accessed only from the function symbol.
  2272  	s.Set(obj.AttrStatic, true)
  2273  	return s
  2274  }
  2275  
  2276  // canRotate reports whether the architecture supports
  2277  // rotates of integer registers with the given number of bits.
  2278  func canRotate(c *Config, bits int64) bool {
  2279  	if bits > c.PtrSize*8 {
  2280  		// Don't rewrite to rotates bigger than the machine word.
  2281  		return false
  2282  	}
  2283  	switch c.arch {
  2284  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2285  		return true
  2286  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2287  		return bits >= 32
  2288  	default:
  2289  		return false
  2290  	}
  2291  }
  2292  
  2293  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2294  func isARM64bitcon(x uint64) bool {
  2295  	if x == 1<<64-1 || x == 0 {
  2296  		return false
  2297  	}
  2298  	// determine the period and sign-extend a unit to 64 bits
  2299  	switch {
  2300  	case x != x>>32|x<<32:
  2301  		// period is 64
  2302  		// nothing to do
  2303  	case x != x>>16|x<<48:
  2304  		// period is 32
  2305  		x = uint64(int64(int32(x)))
  2306  	case x != x>>8|x<<56:
  2307  		// period is 16
  2308  		x = uint64(int64(int16(x)))
  2309  	case x != x>>4|x<<60:
  2310  		// period is 8
  2311  		x = uint64(int64(int8(x)))
  2312  	default:
  2313  		// period is 4 or 2, always true
  2314  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2315  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2316  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2317  		// 0101, 1010             -- 01   rotate, repeat
  2318  		return true
  2319  	}
  2320  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2321  }
  2322  
  2323  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2324  func sequenceOfOnes(x uint64) bool {
  2325  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2326  	y += x
  2327  	return (y-1)&y == 0
  2328  }
  2329  
  2330  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2331  func isARM64addcon(v int64) bool {
  2332  	/* uimm12 or uimm24? */
  2333  	if v < 0 {
  2334  		return false
  2335  	}
  2336  	if (v & 0xFFF) == 0 {
  2337  		v >>= 12
  2338  	}
  2339  	return v <= 0xFFF
  2340  }
  2341  
  2342  // setPos sets the position of v to pos, then returns true.
  2343  // Useful for setting the result of a rewrite's position to
  2344  // something other than the default.
  2345  func setPos(v *Value, pos src.XPos) bool {
  2346  	v.Pos = pos
  2347  	return true
  2348  }
  2349  
  2350  // isNonNegative reports whether v is known to be greater or equal to zero.
  2351  // Note that this is pretty simplistic. The prove pass generates more detailed
  2352  // nonnegative information about values.
  2353  func isNonNegative(v *Value) bool {
  2354  	if !v.Type.IsInteger() {
  2355  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2356  	}
  2357  	// TODO: return true if !v.Type.IsSigned()
  2358  	// SSA isn't type-safe enough to do that now (issue 37753).
  2359  	// The checks below depend only on the pattern of bits.
  2360  
  2361  	switch v.Op {
  2362  	case OpConst64:
  2363  		return v.AuxInt >= 0
  2364  
  2365  	case OpConst32:
  2366  		return int32(v.AuxInt) >= 0
  2367  
  2368  	case OpConst16:
  2369  		return int16(v.AuxInt) >= 0
  2370  
  2371  	case OpConst8:
  2372  		return int8(v.AuxInt) >= 0
  2373  
  2374  	case OpStringLen, OpSliceLen, OpSliceCap,
  2375  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2376  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2377  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2378  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2379  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2380  		return true
  2381  
  2382  	case OpRsh64Ux64, OpRsh32Ux64:
  2383  		by := v.Args[1]
  2384  		return by.Op == OpConst64 && by.AuxInt > 0
  2385  
  2386  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2387  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2388  		return isNonNegative(v.Args[0])
  2389  
  2390  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2391  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2392  
  2393  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2394  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2395  		OpOr64, OpOr32, OpOr16, OpOr8,
  2396  		OpXor64, OpXor32, OpXor16, OpXor8:
  2397  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2398  
  2399  		// We could handle OpPhi here, but the improvements from doing
  2400  		// so are very minor, and it is neither simple nor cheap.
  2401  	}
  2402  	return false
  2403  }
  2404  
  2405  func rewriteStructLoad(v *Value) *Value {
  2406  	b := v.Block
  2407  	ptr := v.Args[0]
  2408  	mem := v.Args[1]
  2409  
  2410  	t := v.Type
  2411  	args := make([]*Value, t.NumFields())
  2412  	for i := range args {
  2413  		ft := t.FieldType(i)
  2414  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2415  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2416  	}
  2417  
  2418  	v.reset(OpStructMake)
  2419  	v.AddArgs(args...)
  2420  	return v
  2421  }
  2422  
  2423  func rewriteStructStore(v *Value) *Value {
  2424  	b := v.Block
  2425  	dst := v.Args[0]
  2426  	x := v.Args[1]
  2427  	if x.Op != OpStructMake {
  2428  		base.Fatalf("invalid struct store: %v", x)
  2429  	}
  2430  	mem := v.Args[2]
  2431  
  2432  	t := x.Type
  2433  	for i, arg := range x.Args {
  2434  		ft := t.FieldType(i)
  2435  
  2436  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2437  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2438  	}
  2439  
  2440  	return mem
  2441  }
  2442  

View as plain text