...

Source file src/cmd/compile/internal/ssa/regalloc.go

Documentation: cmd/compile/internal/ssa

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"cmd/compile/internal/base"
   118  	"cmd/compile/internal/ir"
   119  	"cmd/compile/internal/types"
   120  	"cmd/internal/src"
   121  	"cmd/internal/sys"
   122  	"fmt"
   123  	"internal/buildcfg"
   124  	"math/bits"
   125  	"unsafe"
   126  )
   127  
   128  const (
   129  	moveSpills = iota
   130  	logSpills
   131  	regDebug
   132  	stackDebug
   133  )
   134  
   135  // distance is a measure of how far into the future values are used.
   136  // distance is measured in units of instructions.
   137  const (
   138  	likelyDistance   = 1
   139  	normalDistance   = 10
   140  	unlikelyDistance = 100
   141  )
   142  
   143  // regalloc performs register allocation on f. It sets f.RegAlloc
   144  // to the resulting allocation.
   145  func regalloc(f *Func) {
   146  	var s regAllocState
   147  	s.init(f)
   148  	s.regalloc(f)
   149  	s.close()
   150  }
   151  
   152  type register uint8
   153  
   154  const noRegister register = 255
   155  
   156  // For bulk initializing
   157  var noRegisters [32]register = [32]register{
   158  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   159  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   160  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   161  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   162  }
   163  
   164  // A regMask encodes a set of machine registers.
   165  // TODO: regMask -> regSet?
   166  type regMask uint64
   167  
   168  func (m regMask) String() string {
   169  	s := ""
   170  	for r := register(0); m != 0; r++ {
   171  		if m>>r&1 == 0 {
   172  			continue
   173  		}
   174  		m &^= regMask(1) << r
   175  		if s != "" {
   176  			s += " "
   177  		}
   178  		s += fmt.Sprintf("r%d", r)
   179  	}
   180  	return s
   181  }
   182  
   183  func (s *regAllocState) RegMaskString(m regMask) string {
   184  	str := ""
   185  	for r := register(0); m != 0; r++ {
   186  		if m>>r&1 == 0 {
   187  			continue
   188  		}
   189  		m &^= regMask(1) << r
   190  		if str != "" {
   191  			str += " "
   192  		}
   193  		str += s.registers[r].String()
   194  	}
   195  	return str
   196  }
   197  
   198  // countRegs returns the number of set bits in the register mask.
   199  func countRegs(r regMask) int {
   200  	return bits.OnesCount64(uint64(r))
   201  }
   202  
   203  // pickReg picks an arbitrary register from the register mask.
   204  func pickReg(r regMask) register {
   205  	if r == 0 {
   206  		panic("can't pick a register from an empty set")
   207  	}
   208  	// pick the lowest one
   209  	return register(bits.TrailingZeros64(uint64(r)))
   210  }
   211  
   212  type use struct {
   213  	dist int32    // distance from start of the block to a use of a value
   214  	pos  src.XPos // source position of the use
   215  	next *use     // linked list of uses of a value in nondecreasing dist order
   216  }
   217  
   218  // A valState records the register allocation state for a (pre-regalloc) value.
   219  type valState struct {
   220  	regs              regMask // the set of registers holding a Value (usually just one)
   221  	uses              *use    // list of uses in this block
   222  	spill             *Value  // spilled copy of the Value (if any)
   223  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   224  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   225  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   226  	rematerializeable bool    // cached value of v.rematerializeable()
   227  }
   228  
   229  type regState struct {
   230  	v *Value // Original (preregalloc) Value stored in this register.
   231  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   232  	// If a register is unused, v==c==nil
   233  }
   234  
   235  type regAllocState struct {
   236  	f *Func
   237  
   238  	sdom        SparseTree
   239  	registers   []Register
   240  	numRegs     register
   241  	SPReg       register
   242  	SBReg       register
   243  	GReg        register
   244  	allocatable regMask
   245  
   246  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   247  	// which are live at the end of b, together with a count of how many instructions
   248  	// forward to the next use.
   249  	live [][]liveInfo
   250  	// desired register assignments at the end of each block.
   251  	// Note that this is a static map computed before allocation occurs. Dynamic
   252  	// register desires (from partially completed allocations) will trump
   253  	// this information.
   254  	desired []desiredState
   255  
   256  	// current state of each (preregalloc) Value
   257  	values []valState
   258  
   259  	// ID of SP, SB values
   260  	sp, sb ID
   261  
   262  	// For each Value, map from its value ID back to the
   263  	// preregalloc Value it was derived from.
   264  	orig []*Value
   265  
   266  	// current state of each register
   267  	regs []regState
   268  
   269  	// registers that contain values which can't be kicked out
   270  	nospill regMask
   271  
   272  	// mask of registers currently in use
   273  	used regMask
   274  
   275  	// mask of registers used since the start of the current block
   276  	usedSinceBlockStart regMask
   277  
   278  	// mask of registers used in the current instruction
   279  	tmpused regMask
   280  
   281  	// current block we're working on
   282  	curBlock *Block
   283  
   284  	// cache of use records
   285  	freeUseRecords *use
   286  
   287  	// endRegs[blockid] is the register state at the end of each block.
   288  	// encoded as a set of endReg records.
   289  	endRegs [][]endReg
   290  
   291  	// startRegs[blockid] is the register state at the start of merge blocks.
   292  	// saved state does not include the state of phi ops in the block.
   293  	startRegs [][]startReg
   294  
   295  	// startRegsMask is a mask of the registers in startRegs[curBlock.ID].
   296  	// Registers dropped from startRegsMask are later synchronoized back to
   297  	// startRegs by dropping from there as well.
   298  	startRegsMask regMask
   299  
   300  	// spillLive[blockid] is the set of live spills at the end of each block
   301  	spillLive [][]ID
   302  
   303  	// a set of copies we generated to move things around, and
   304  	// whether it is used in shuffle. Unused copies will be deleted.
   305  	copies map[*Value]bool
   306  
   307  	loopnest *loopnest
   308  
   309  	// choose a good order in which to visit blocks for allocation purposes.
   310  	visitOrder []*Block
   311  
   312  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   313  	blockOrder []int32
   314  
   315  	// whether to insert instructions that clobber dead registers at call sites
   316  	doClobber bool
   317  }
   318  
   319  type endReg struct {
   320  	r register
   321  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   322  	c *Value // cached version of the value
   323  }
   324  
   325  type startReg struct {
   326  	r   register
   327  	v   *Value   // pre-regalloc value needed in this register
   328  	c   *Value   // cached version of the value
   329  	pos src.XPos // source position of use of this register
   330  }
   331  
   332  // freeReg frees up register r. Any current user of r is kicked out.
   333  func (s *regAllocState) freeReg(r register) {
   334  	v := s.regs[r].v
   335  	if v == nil {
   336  		s.f.Fatalf("tried to free an already free register %d\n", r)
   337  	}
   338  
   339  	// Mark r as unused.
   340  	if s.f.pass.debug > regDebug {
   341  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   342  	}
   343  	s.regs[r] = regState{}
   344  	s.values[v.ID].regs &^= regMask(1) << r
   345  	s.used &^= regMask(1) << r
   346  }
   347  
   348  // freeRegs frees up all registers listed in m.
   349  func (s *regAllocState) freeRegs(m regMask) {
   350  	for m&s.used != 0 {
   351  		s.freeReg(pickReg(m & s.used))
   352  	}
   353  }
   354  
   355  // clobberRegs inserts instructions that clobber registers listed in m.
   356  func (s *regAllocState) clobberRegs(m regMask) {
   357  	m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
   358  	for m != 0 {
   359  		r := pickReg(m)
   360  		m &^= 1 << r
   361  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   362  		s.f.setHome(x, &s.registers[r])
   363  	}
   364  }
   365  
   366  // setOrig records that c's original value is the same as
   367  // v's original value.
   368  func (s *regAllocState) setOrig(c *Value, v *Value) {
   369  	if int(c.ID) >= cap(s.orig) {
   370  		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
   371  		copy(x, s.orig)
   372  		s.f.Cache.freeValueSlice(s.orig)
   373  		s.orig = x
   374  	}
   375  	for int(c.ID) >= len(s.orig) {
   376  		s.orig = append(s.orig, nil)
   377  	}
   378  	if s.orig[c.ID] != nil {
   379  		s.f.Fatalf("orig value set twice %s %s", c, v)
   380  	}
   381  	s.orig[c.ID] = s.orig[v.ID]
   382  }
   383  
   384  // assignReg assigns register r to hold c, a copy of v.
   385  // r must be unused.
   386  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   387  	if s.f.pass.debug > regDebug {
   388  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   389  	}
   390  	if s.regs[r].v != nil {
   391  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   392  	}
   393  
   394  	// Update state.
   395  	s.regs[r] = regState{v, c}
   396  	s.values[v.ID].regs |= regMask(1) << r
   397  	s.used |= regMask(1) << r
   398  	s.f.setHome(c, &s.registers[r])
   399  }
   400  
   401  // allocReg chooses a register from the set of registers in mask.
   402  // If there is no unused register, a Value will be kicked out of
   403  // a register to make room.
   404  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   405  	if v.OnWasmStack {
   406  		return noRegister
   407  	}
   408  
   409  	mask &= s.allocatable
   410  	mask &^= s.nospill
   411  	if mask == 0 {
   412  		s.f.Fatalf("no register available for %s", v.LongString())
   413  	}
   414  
   415  	// Pick an unused register if one is available.
   416  	if mask&^s.used != 0 {
   417  		r := pickReg(mask &^ s.used)
   418  		s.usedSinceBlockStart |= regMask(1) << r
   419  		return r
   420  	}
   421  
   422  	// Pick a value to spill. Spill the value with the
   423  	// farthest-in-the-future use.
   424  	// TODO: Prefer registers with already spilled Values?
   425  	// TODO: Modify preference using affinity graph.
   426  	// TODO: if a single value is in multiple registers, spill one of them
   427  	// before spilling a value in just a single register.
   428  
   429  	// Find a register to spill. We spill the register containing the value
   430  	// whose next use is as far in the future as possible.
   431  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   432  	var r register
   433  	maxuse := int32(-1)
   434  	for t := register(0); t < s.numRegs; t++ {
   435  		if mask>>t&1 == 0 {
   436  			continue
   437  		}
   438  		v := s.regs[t].v
   439  		if n := s.values[v.ID].uses.dist; n > maxuse {
   440  			// v's next use is farther in the future than any value
   441  			// we've seen so far. A new best spill candidate.
   442  			r = t
   443  			maxuse = n
   444  		}
   445  	}
   446  	if maxuse == -1 {
   447  		s.f.Fatalf("couldn't find register to spill")
   448  	}
   449  
   450  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   451  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   452  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   453  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   454  		s.freeReg(r)
   455  		return r
   456  	}
   457  
   458  	// Try to move it around before kicking out, if there is a free register.
   459  	// We generate a Copy and record it. It will be deleted if never used.
   460  	v2 := s.regs[r].v
   461  	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
   462  	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   463  		s.usedSinceBlockStart |= regMask(1) << r
   464  		r2 := pickReg(m)
   465  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   466  		s.copies[c] = false
   467  		if s.f.pass.debug > regDebug {
   468  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   469  		}
   470  		s.setOrig(c, v2)
   471  		s.assignReg(r2, v2, c)
   472  	}
   473  
   474  	// If the evicted register isn't used between the start of the block
   475  	// and now then there is no reason to even request it on entry. We can
   476  	// drop from startRegs in that case.
   477  	if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
   478  		if s.startRegsMask&(regMask(1)<<r) == 1 {
   479  			if s.f.pass.debug > regDebug {
   480  				fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
   481  			}
   482  			s.startRegsMask &^= regMask(1) << r
   483  		}
   484  	}
   485  
   486  	s.freeReg(r)
   487  	s.usedSinceBlockStart |= regMask(1) << r
   488  	return r
   489  }
   490  
   491  // makeSpill returns a Value which represents the spilled value of v.
   492  // b is the block in which the spill is used.
   493  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   494  	vi := &s.values[v.ID]
   495  	if vi.spill != nil {
   496  		// Final block not known - keep track of subtree where restores reside.
   497  		vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
   498  		vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
   499  		return vi.spill
   500  	}
   501  	// Make a spill for v. We don't know where we want
   502  	// to put it yet, so we leave it blockless for now.
   503  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   504  	// We also don't know what the spill's arg will be.
   505  	// Leave it argless for now.
   506  	s.setOrig(spill, v)
   507  	vi.spill = spill
   508  	vi.restoreMin = s.sdom[b.ID].entry
   509  	vi.restoreMax = s.sdom[b.ID].exit
   510  	return spill
   511  }
   512  
   513  // allocValToReg allocates v to a register selected from regMask and
   514  // returns the register copy of v. Any previous user is kicked out and spilled
   515  // (if necessary). Load code is added at the current pc. If nospill is set the
   516  // allocated register is marked nospill so the assignment cannot be
   517  // undone until the caller allows it by clearing nospill. Returns a
   518  // *Value which is either v or a copy of v allocated to the chosen register.
   519  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   520  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   521  		c := v.copyIntoWithXPos(s.curBlock, pos)
   522  		c.OnWasmStack = true
   523  		s.setOrig(c, v)
   524  		return c
   525  	}
   526  	if v.OnWasmStack {
   527  		return v
   528  	}
   529  
   530  	vi := &s.values[v.ID]
   531  	pos = pos.WithNotStmt()
   532  	// Check if v is already in a requested register.
   533  	if mask&vi.regs != 0 {
   534  		r := pickReg(mask & vi.regs)
   535  		if s.regs[r].v != v || s.regs[r].c == nil {
   536  			panic("bad register state")
   537  		}
   538  		if nospill {
   539  			s.nospill |= regMask(1) << r
   540  		}
   541  		s.usedSinceBlockStart |= regMask(1) << r
   542  		return s.regs[r].c
   543  	}
   544  
   545  	var r register
   546  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   547  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   548  	if !onWasmStack {
   549  		// Allocate a register.
   550  		r = s.allocReg(mask, v)
   551  	}
   552  
   553  	// Allocate v to the new register.
   554  	var c *Value
   555  	if vi.regs != 0 {
   556  		// Copy from a register that v is already in.
   557  		r2 := pickReg(vi.regs)
   558  		if s.regs[r2].v != v {
   559  			panic("bad register state")
   560  		}
   561  		s.usedSinceBlockStart |= regMask(1) << r2
   562  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
   563  	} else if v.rematerializeable() {
   564  		// Rematerialize instead of loading from the spill location.
   565  		c = v.copyIntoWithXPos(s.curBlock, pos)
   566  	} else {
   567  		// Load v from its spill location.
   568  		spill := s.makeSpill(v, s.curBlock)
   569  		if s.f.pass.debug > logSpills {
   570  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   571  		}
   572  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   573  	}
   574  
   575  	s.setOrig(c, v)
   576  
   577  	if onWasmStack {
   578  		c.OnWasmStack = true
   579  		return c
   580  	}
   581  
   582  	s.assignReg(r, v, c)
   583  	if c.Op == OpLoadReg && s.isGReg(r) {
   584  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   585  	}
   586  	if nospill {
   587  		s.nospill |= regMask(1) << r
   588  	}
   589  	return c
   590  }
   591  
   592  // isLeaf reports whether f performs any calls.
   593  func isLeaf(f *Func) bool {
   594  	for _, b := range f.Blocks {
   595  		for _, v := range b.Values {
   596  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   597  				// tail call is not counted as it does not save the return PC or need a frame
   598  				return false
   599  			}
   600  		}
   601  	}
   602  	return true
   603  }
   604  
   605  // needRegister reports whether v needs a register.
   606  func (v *Value) needRegister() bool {
   607  	return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
   608  }
   609  
   610  func (s *regAllocState) init(f *Func) {
   611  	s.f = f
   612  	s.f.RegAlloc = s.f.Cache.locs[:0]
   613  	s.registers = f.Config.registers
   614  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
   615  		s.f.Fatalf("bad number of registers: %d", nr)
   616  	} else {
   617  		s.numRegs = register(nr)
   618  	}
   619  	// Locate SP, SB, and g registers.
   620  	s.SPReg = noRegister
   621  	s.SBReg = noRegister
   622  	s.GReg = noRegister
   623  	for r := register(0); r < s.numRegs; r++ {
   624  		switch s.registers[r].String() {
   625  		case "SP":
   626  			s.SPReg = r
   627  		case "SB":
   628  			s.SBReg = r
   629  		case "g":
   630  			s.GReg = r
   631  		}
   632  	}
   633  	// Make sure we found all required registers.
   634  	switch noRegister {
   635  	case s.SPReg:
   636  		s.f.Fatalf("no SP register found")
   637  	case s.SBReg:
   638  		s.f.Fatalf("no SB register found")
   639  	case s.GReg:
   640  		if f.Config.hasGReg {
   641  			s.f.Fatalf("no g register found")
   642  		}
   643  	}
   644  
   645  	// Figure out which registers we're allowed to use.
   646  	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
   647  	s.allocatable &^= 1 << s.SPReg
   648  	s.allocatable &^= 1 << s.SBReg
   649  	if s.f.Config.hasGReg {
   650  		s.allocatable &^= 1 << s.GReg
   651  	}
   652  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   653  		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
   654  	}
   655  	if s.f.Config.LinkReg != -1 {
   656  		if isLeaf(f) {
   657  			// Leaf functions don't save/restore the link register.
   658  			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
   659  		}
   660  	}
   661  	if s.f.Config.ctxt.Flag_dynlink {
   662  		switch s.f.Config.arch {
   663  		case "386":
   664  			// nothing to do.
   665  			// Note that for Flag_shared (position independent code)
   666  			// we do need to be careful, but that carefulness is hidden
   667  			// in the rewrite rules so we always have a free register
   668  			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
   669  		case "amd64":
   670  			s.allocatable &^= 1 << 15 // R15
   671  		case "arm":
   672  			s.allocatable &^= 1 << 9 // R9
   673  		case "arm64":
   674  			// nothing to do
   675  		case "loong64": // R2 (aka TP) already reserved.
   676  			// nothing to do
   677  		case "ppc64le": // R2 already reserved.
   678  			// nothing to do
   679  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   680  			// nothing to do
   681  		case "s390x":
   682  			s.allocatable &^= 1 << 11 // R11
   683  		default:
   684  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   685  		}
   686  	}
   687  
   688  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   689  	// Decouple the register allocation order from the generated block order.
   690  	// This also creates an opportunity for experiments to find a better order.
   691  	s.visitOrder = layoutRegallocOrder(f)
   692  
   693  	// Compute block order. This array allows us to distinguish forward edges
   694  	// from backward edges and compute how far they go.
   695  	s.blockOrder = make([]int32, f.NumBlocks())
   696  	for i, b := range s.visitOrder {
   697  		s.blockOrder[b.ID] = int32(i)
   698  	}
   699  
   700  	s.regs = make([]regState, s.numRegs)
   701  	nv := f.NumValues()
   702  	if cap(s.f.Cache.regallocValues) >= nv {
   703  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   704  	} else {
   705  		s.f.Cache.regallocValues = make([]valState, nv)
   706  	}
   707  	s.values = s.f.Cache.regallocValues
   708  	s.orig = s.f.Cache.allocValueSlice(nv)
   709  	s.copies = make(map[*Value]bool)
   710  	for _, b := range s.visitOrder {
   711  		for _, v := range b.Values {
   712  			if v.needRegister() {
   713  				s.values[v.ID].needReg = true
   714  				s.values[v.ID].rematerializeable = v.rematerializeable()
   715  				s.orig[v.ID] = v
   716  			}
   717  			// Note: needReg is false for values returning Tuple types.
   718  			// Instead, we mark the corresponding Selects as needReg.
   719  		}
   720  	}
   721  	s.computeLive()
   722  
   723  	s.endRegs = make([][]endReg, f.NumBlocks())
   724  	s.startRegs = make([][]startReg, f.NumBlocks())
   725  	s.spillLive = make([][]ID, f.NumBlocks())
   726  	s.sdom = f.Sdom()
   727  
   728  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   729  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   730  		canLiveOnStack := f.newSparseSet(f.NumValues())
   731  		defer f.retSparseSet(canLiveOnStack)
   732  		for _, b := range f.Blocks {
   733  			// New block. Clear candidate set.
   734  			canLiveOnStack.clear()
   735  			for _, c := range b.ControlValues() {
   736  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   737  					canLiveOnStack.add(c.ID)
   738  				}
   739  			}
   740  			// Walking backwards.
   741  			for i := len(b.Values) - 1; i >= 0; i-- {
   742  				v := b.Values[i]
   743  				if canLiveOnStack.contains(v.ID) {
   744  					v.OnWasmStack = true
   745  				} else {
   746  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   747  					canLiveOnStack.clear()
   748  				}
   749  				for _, arg := range v.Args {
   750  					// Value can live on the stack if:
   751  					// - it is only used once
   752  					// - it is used in the same basic block
   753  					// - it is not a "mem" value
   754  					// - it is a WebAssembly op
   755  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   756  						canLiveOnStack.add(arg.ID)
   757  					}
   758  				}
   759  			}
   760  		}
   761  	}
   762  
   763  	// The clobberdeadreg experiment inserts code to clobber dead registers
   764  	// at call sites.
   765  	// Ignore huge functions to avoid doing too much work.
   766  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   767  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   768  		s.doClobber = true
   769  	}
   770  }
   771  
   772  func (s *regAllocState) close() {
   773  	s.f.Cache.freeValueSlice(s.orig)
   774  }
   775  
   776  // Adds a use record for id at distance dist from the start of the block.
   777  // All calls to addUse must happen with nonincreasing dist.
   778  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   779  	r := s.freeUseRecords
   780  	if r != nil {
   781  		s.freeUseRecords = r.next
   782  	} else {
   783  		r = &use{}
   784  	}
   785  	r.dist = dist
   786  	r.pos = pos
   787  	r.next = s.values[id].uses
   788  	s.values[id].uses = r
   789  	if r.next != nil && dist > r.next.dist {
   790  		s.f.Fatalf("uses added in wrong order")
   791  	}
   792  }
   793  
   794  // advanceUses advances the uses of v's args from the state before v to the state after v.
   795  // Any values which have no more uses are deallocated from registers.
   796  func (s *regAllocState) advanceUses(v *Value) {
   797  	for _, a := range v.Args {
   798  		if !s.values[a.ID].needReg {
   799  			continue
   800  		}
   801  		ai := &s.values[a.ID]
   802  		r := ai.uses
   803  		ai.uses = r.next
   804  		if r.next == nil {
   805  			// Value is dead, free all registers that hold it.
   806  			s.freeRegs(ai.regs)
   807  		}
   808  		r.next = s.freeUseRecords
   809  		s.freeUseRecords = r
   810  	}
   811  }
   812  
   813  // liveAfterCurrentInstruction reports whether v is live after
   814  // the current instruction is completed.  v must be used by the
   815  // current instruction.
   816  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   817  	u := s.values[v.ID].uses
   818  	if u == nil {
   819  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   820  	}
   821  	d := u.dist
   822  	for u != nil && u.dist == d {
   823  		u = u.next
   824  	}
   825  	return u != nil && u.dist > d
   826  }
   827  
   828  // Sets the state of the registers to that encoded in regs.
   829  func (s *regAllocState) setState(regs []endReg) {
   830  	s.freeRegs(s.used)
   831  	for _, x := range regs {
   832  		s.assignReg(x.r, x.v, x.c)
   833  	}
   834  }
   835  
   836  // compatRegs returns the set of registers which can store a type t.
   837  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   838  	var m regMask
   839  	if t.IsTuple() || t.IsFlags() {
   840  		return 0
   841  	}
   842  	if t.IsFloat() || t == types.TypeInt128 {
   843  		if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
   844  			m = s.f.Config.fp32RegMask
   845  		} else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
   846  			m = s.f.Config.fp64RegMask
   847  		} else {
   848  			m = s.f.Config.fpRegMask
   849  		}
   850  	} else {
   851  		m = s.f.Config.gpRegMask
   852  	}
   853  	return m & s.allocatable
   854  }
   855  
   856  // regspec returns the regInfo for operation op.
   857  func (s *regAllocState) regspec(v *Value) regInfo {
   858  	op := v.Op
   859  	if op == OpConvert {
   860  		// OpConvert is a generic op, so it doesn't have a
   861  		// register set in the static table. It can use any
   862  		// allocatable integer register.
   863  		m := s.allocatable & s.f.Config.gpRegMask
   864  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
   865  	}
   866  	if op == OpArgIntReg {
   867  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
   868  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   869  	}
   870  	if op == OpArgFloatReg {
   871  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
   872  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   873  	}
   874  	if op.IsCall() {
   875  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
   876  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
   877  		}
   878  	}
   879  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
   880  		return *s.f.OwnAux.ResultReg(s.f.Config)
   881  	}
   882  	return opcodeTable[op].reg
   883  }
   884  
   885  func (s *regAllocState) isGReg(r register) bool {
   886  	return s.f.Config.hasGReg && s.GReg == r
   887  }
   888  
   889  // Dummy value used to represent the value being held in a temporary register.
   890  var tmpVal Value
   891  
   892  func (s *regAllocState) regalloc(f *Func) {
   893  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
   894  	defer f.retSparseSet(regValLiveSet)
   895  	var oldSched []*Value
   896  	var phis []*Value
   897  	var phiRegs []register
   898  	var args []*Value
   899  
   900  	// Data structure used for computing desired registers.
   901  	var desired desiredState
   902  
   903  	// Desired registers for inputs & outputs for each instruction in the block.
   904  	type dentry struct {
   905  		out [4]register    // desired output registers
   906  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
   907  	}
   908  	var dinfo []dentry
   909  
   910  	if f.Entry != f.Blocks[0] {
   911  		f.Fatalf("entry block must be first")
   912  	}
   913  
   914  	for _, b := range s.visitOrder {
   915  		if s.f.pass.debug > regDebug {
   916  			fmt.Printf("Begin processing block %v\n", b)
   917  		}
   918  		s.curBlock = b
   919  		s.startRegsMask = 0
   920  		s.usedSinceBlockStart = 0
   921  
   922  		// Initialize regValLiveSet and uses fields for this block.
   923  		// Walk backwards through the block doing liveness analysis.
   924  		regValLiveSet.clear()
   925  		for _, e := range s.live[b.ID] {
   926  			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
   927  			regValLiveSet.add(e.ID)
   928  		}
   929  		for _, v := range b.ControlValues() {
   930  			if s.values[v.ID].needReg {
   931  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
   932  				regValLiveSet.add(v.ID)
   933  			}
   934  		}
   935  		for i := len(b.Values) - 1; i >= 0; i-- {
   936  			v := b.Values[i]
   937  			regValLiveSet.remove(v.ID)
   938  			if v.Op == OpPhi {
   939  				// Remove v from the live set, but don't add
   940  				// any inputs. This is the state the len(b.Preds)>1
   941  				// case below desires; it wants to process phis specially.
   942  				continue
   943  			}
   944  			if opcodeTable[v.Op].call {
   945  				// Function call clobbers all the registers but SP and SB.
   946  				regValLiveSet.clear()
   947  				if s.sp != 0 && s.values[s.sp].uses != nil {
   948  					regValLiveSet.add(s.sp)
   949  				}
   950  				if s.sb != 0 && s.values[s.sb].uses != nil {
   951  					regValLiveSet.add(s.sb)
   952  				}
   953  			}
   954  			for _, a := range v.Args {
   955  				if !s.values[a.ID].needReg {
   956  					continue
   957  				}
   958  				s.addUse(a.ID, int32(i), v.Pos)
   959  				regValLiveSet.add(a.ID)
   960  			}
   961  		}
   962  		if s.f.pass.debug > regDebug {
   963  			fmt.Printf("use distances for %s\n", b)
   964  			for i := range s.values {
   965  				vi := &s.values[i]
   966  				u := vi.uses
   967  				if u == nil {
   968  					continue
   969  				}
   970  				fmt.Printf("  v%d:", i)
   971  				for u != nil {
   972  					fmt.Printf(" %d", u.dist)
   973  					u = u.next
   974  				}
   975  				fmt.Println()
   976  			}
   977  		}
   978  
   979  		// Make a copy of the block schedule so we can generate a new one in place.
   980  		// We make a separate copy for phis and regular values.
   981  		nphi := 0
   982  		for _, v := range b.Values {
   983  			if v.Op != OpPhi {
   984  				break
   985  			}
   986  			nphi++
   987  		}
   988  		phis = append(phis[:0], b.Values[:nphi]...)
   989  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
   990  		b.Values = b.Values[:0]
   991  
   992  		// Initialize start state of block.
   993  		if b == f.Entry {
   994  			// Regalloc state is empty to start.
   995  			if nphi > 0 {
   996  				f.Fatalf("phis in entry block")
   997  			}
   998  		} else if len(b.Preds) == 1 {
   999  			// Start regalloc state with the end state of the previous block.
  1000  			s.setState(s.endRegs[b.Preds[0].b.ID])
  1001  			if nphi > 0 {
  1002  				f.Fatalf("phis in single-predecessor block")
  1003  			}
  1004  			// Drop any values which are no longer live.
  1005  			// This may happen because at the end of p, a value may be
  1006  			// live but only used by some other successor of p.
  1007  			for r := register(0); r < s.numRegs; r++ {
  1008  				v := s.regs[r].v
  1009  				if v != nil && !regValLiveSet.contains(v.ID) {
  1010  					s.freeReg(r)
  1011  				}
  1012  			}
  1013  		} else {
  1014  			// This is the complicated case. We have more than one predecessor,
  1015  			// which means we may have Phi ops.
  1016  
  1017  			// Start with the final register state of the predecessor with least spill values.
  1018  			// This is based on the following points:
  1019  			// 1, The less spill value indicates that the register pressure of this path is smaller,
  1020  			//    so the values of this block are more likely to be allocated to registers.
  1021  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
  1022  			//    contains the function call usually generates a lot of spills and lose the previous
  1023  			//    allocation state.
  1024  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
  1025  			// an impact on the code size and compiler speed. But it is not easy to find a simple
  1026  			// and efficient method that combines multiple factors.
  1027  			idx := -1
  1028  			for i, p := range b.Preds {
  1029  				// If the predecessor has not been visited yet, skip it because its end state
  1030  				// (redRegs and spillLive) has not been computed yet.
  1031  				pb := p.b
  1032  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
  1033  					continue
  1034  				}
  1035  				if idx == -1 {
  1036  					idx = i
  1037  					continue
  1038  				}
  1039  				pSel := b.Preds[idx].b
  1040  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
  1041  					idx = i
  1042  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
  1043  					// Use a bit of likely information. After critical pass, pb and pSel must
  1044  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
  1045  					// TODO: improve the prediction of the likely predecessor. The following
  1046  					// method is only suitable for the simplest cases. For complex cases,
  1047  					// the prediction may be inaccurate, but this does not affect the
  1048  					// correctness of the program.
  1049  					// According to the layout algorithm, the predecessor with the
  1050  					// smaller blockOrder is the true branch, and the test results show
  1051  					// that it is better to choose the predecessor with a smaller
  1052  					// blockOrder than no choice.
  1053  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1054  						idx = i
  1055  					}
  1056  				}
  1057  			}
  1058  			if idx < 0 {
  1059  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1060  			}
  1061  			p := b.Preds[idx].b
  1062  			s.setState(s.endRegs[p.ID])
  1063  
  1064  			if s.f.pass.debug > regDebug {
  1065  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1066  				for _, x := range s.endRegs[p.ID] {
  1067  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1068  				}
  1069  			}
  1070  
  1071  			// Decide on registers for phi ops. Use the registers determined
  1072  			// by the primary predecessor if we can.
  1073  			// TODO: pick best of (already processed) predecessors?
  1074  			// Majority vote? Deepest nesting level?
  1075  			phiRegs = phiRegs[:0]
  1076  			var phiUsed regMask
  1077  
  1078  			for _, v := range phis {
  1079  				if !s.values[v.ID].needReg {
  1080  					phiRegs = append(phiRegs, noRegister)
  1081  					continue
  1082  				}
  1083  				a := v.Args[idx]
  1084  				// Some instructions target not-allocatable registers.
  1085  				// They're not suitable for further (phi-function) allocation.
  1086  				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
  1087  				if m != 0 {
  1088  					r := pickReg(m)
  1089  					phiUsed |= regMask(1) << r
  1090  					phiRegs = append(phiRegs, r)
  1091  				} else {
  1092  					phiRegs = append(phiRegs, noRegister)
  1093  				}
  1094  			}
  1095  
  1096  			// Second pass - deallocate all in-register phi inputs.
  1097  			for i, v := range phis {
  1098  				if !s.values[v.ID].needReg {
  1099  					continue
  1100  				}
  1101  				a := v.Args[idx]
  1102  				r := phiRegs[i]
  1103  				if r == noRegister {
  1104  					continue
  1105  				}
  1106  				if regValLiveSet.contains(a.ID) {
  1107  					// Input value is still live (it is used by something other than Phi).
  1108  					// Try to move it around before kicking out, if there is a free register.
  1109  					// We generate a Copy in the predecessor block and record it. It will be
  1110  					// deleted later if never used.
  1111  					//
  1112  					// Pick a free register. At this point some registers used in the predecessor
  1113  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1114  					// them (and they are not going to be helpful anyway).
  1115  					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
  1116  					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1117  						r2 := pickReg(m)
  1118  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1119  						s.copies[c] = false
  1120  						if s.f.pass.debug > regDebug {
  1121  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1122  						}
  1123  						s.setOrig(c, a)
  1124  						s.assignReg(r2, a, c)
  1125  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1126  					}
  1127  				}
  1128  				s.freeReg(r)
  1129  			}
  1130  
  1131  			// Copy phi ops into new schedule.
  1132  			b.Values = append(b.Values, phis...)
  1133  
  1134  			// Third pass - pick registers for phis whose input
  1135  			// was not in a register in the primary predecessor.
  1136  			for i, v := range phis {
  1137  				if !s.values[v.ID].needReg {
  1138  					continue
  1139  				}
  1140  				if phiRegs[i] != noRegister {
  1141  					continue
  1142  				}
  1143  				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
  1144  				// If one of the other inputs of v is in a register, and the register is available,
  1145  				// select this register, which can save some unnecessary copies.
  1146  				for i, pe := range b.Preds {
  1147  					if i == idx {
  1148  						continue
  1149  					}
  1150  					ri := noRegister
  1151  					for _, er := range s.endRegs[pe.b.ID] {
  1152  						if er.v == s.orig[v.Args[i].ID] {
  1153  							ri = er.r
  1154  							break
  1155  						}
  1156  					}
  1157  					if ri != noRegister && m>>ri&1 != 0 {
  1158  						m = regMask(1) << ri
  1159  						break
  1160  					}
  1161  				}
  1162  				if m != 0 {
  1163  					r := pickReg(m)
  1164  					phiRegs[i] = r
  1165  					phiUsed |= regMask(1) << r
  1166  				}
  1167  			}
  1168  
  1169  			// Set registers for phis. Add phi spill code.
  1170  			for i, v := range phis {
  1171  				if !s.values[v.ID].needReg {
  1172  					continue
  1173  				}
  1174  				r := phiRegs[i]
  1175  				if r == noRegister {
  1176  					// stack-based phi
  1177  					// Spills will be inserted in all the predecessors below.
  1178  					s.values[v.ID].spill = v // v starts life spilled
  1179  					continue
  1180  				}
  1181  				// register-based phi
  1182  				s.assignReg(r, v, v)
  1183  			}
  1184  
  1185  			// Deallocate any values which are no longer live. Phis are excluded.
  1186  			for r := register(0); r < s.numRegs; r++ {
  1187  				if phiUsed>>r&1 != 0 {
  1188  					continue
  1189  				}
  1190  				v := s.regs[r].v
  1191  				if v != nil && !regValLiveSet.contains(v.ID) {
  1192  					s.freeReg(r)
  1193  				}
  1194  			}
  1195  
  1196  			// Save the starting state for use by merge edges.
  1197  			// We append to a stack allocated variable that we'll
  1198  			// later copy into s.startRegs in one fell swoop, to save
  1199  			// on allocations.
  1200  			regList := make([]startReg, 0, 32)
  1201  			for r := register(0); r < s.numRegs; r++ {
  1202  				v := s.regs[r].v
  1203  				if v == nil {
  1204  					continue
  1205  				}
  1206  				if phiUsed>>r&1 != 0 {
  1207  					// Skip registers that phis used, we'll handle those
  1208  					// specially during merge edge processing.
  1209  					continue
  1210  				}
  1211  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1212  				s.startRegsMask |= regMask(1) << r
  1213  			}
  1214  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1215  			copy(s.startRegs[b.ID], regList)
  1216  
  1217  			if s.f.pass.debug > regDebug {
  1218  				fmt.Printf("after phis\n")
  1219  				for _, x := range s.startRegs[b.ID] {
  1220  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1221  				}
  1222  			}
  1223  		}
  1224  
  1225  		// Allocate space to record the desired registers for each value.
  1226  		if l := len(oldSched); cap(dinfo) < l {
  1227  			dinfo = make([]dentry, l)
  1228  		} else {
  1229  			dinfo = dinfo[:l]
  1230  			for i := range dinfo {
  1231  				dinfo[i] = dentry{}
  1232  			}
  1233  		}
  1234  
  1235  		// Load static desired register info at the end of the block.
  1236  		desired.copy(&s.desired[b.ID])
  1237  
  1238  		// Check actual assigned registers at the start of the next block(s).
  1239  		// Dynamically assigned registers will trump the static
  1240  		// desired registers computed during liveness analysis.
  1241  		// Note that we do this phase after startRegs is set above, so that
  1242  		// we get the right behavior for a block which branches to itself.
  1243  		for _, e := range b.Succs {
  1244  			succ := e.b
  1245  			// TODO: prioritize likely successor?
  1246  			for _, x := range s.startRegs[succ.ID] {
  1247  				desired.add(x.v.ID, x.r)
  1248  			}
  1249  			// Process phi ops in succ.
  1250  			pidx := e.i
  1251  			for _, v := range succ.Values {
  1252  				if v.Op != OpPhi {
  1253  					break
  1254  				}
  1255  				if !s.values[v.ID].needReg {
  1256  					continue
  1257  				}
  1258  				rp, ok := s.f.getHome(v.ID).(*Register)
  1259  				if !ok {
  1260  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1261  					// Hopefully v will get assigned that register later.
  1262  					// If the inputs have allocated register information, add it to desired,
  1263  					// which may reduce spill or copy operations when the register is available.
  1264  					for _, a := range v.Args {
  1265  						rp, ok = s.f.getHome(a.ID).(*Register)
  1266  						if ok {
  1267  							break
  1268  						}
  1269  					}
  1270  					if !ok {
  1271  						continue
  1272  					}
  1273  				}
  1274  				desired.add(v.Args[pidx].ID, register(rp.num))
  1275  			}
  1276  		}
  1277  		// Walk values backwards computing desired register info.
  1278  		// See computeLive for more comments.
  1279  		for i := len(oldSched) - 1; i >= 0; i-- {
  1280  			v := oldSched[i]
  1281  			prefs := desired.remove(v.ID)
  1282  			regspec := s.regspec(v)
  1283  			desired.clobber(regspec.clobbers)
  1284  			for _, j := range regspec.inputs {
  1285  				if countRegs(j.regs) != 1 {
  1286  					continue
  1287  				}
  1288  				desired.clobber(j.regs)
  1289  				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  1290  			}
  1291  			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  1292  				if opcodeTable[v.Op].commutative {
  1293  					desired.addList(v.Args[1].ID, prefs)
  1294  				}
  1295  				desired.addList(v.Args[0].ID, prefs)
  1296  			}
  1297  			// Save desired registers for this value.
  1298  			dinfo[i].out = prefs
  1299  			for j, a := range v.Args {
  1300  				if j >= len(dinfo[i].in) {
  1301  					break
  1302  				}
  1303  				dinfo[i].in[j] = desired.get(a.ID)
  1304  			}
  1305  		}
  1306  
  1307  		// Process all the non-phi values.
  1308  		for idx, v := range oldSched {
  1309  			tmpReg := noRegister
  1310  			if s.f.pass.debug > regDebug {
  1311  				fmt.Printf("  processing %s\n", v.LongString())
  1312  			}
  1313  			regspec := s.regspec(v)
  1314  			if v.Op == OpPhi {
  1315  				f.Fatalf("phi %s not at start of block", v)
  1316  			}
  1317  			if v.Op == OpSP {
  1318  				s.assignReg(s.SPReg, v, v)
  1319  				b.Values = append(b.Values, v)
  1320  				s.advanceUses(v)
  1321  				s.sp = v.ID
  1322  				continue
  1323  			}
  1324  			if v.Op == OpSB {
  1325  				s.assignReg(s.SBReg, v, v)
  1326  				b.Values = append(b.Values, v)
  1327  				s.advanceUses(v)
  1328  				s.sb = v.ID
  1329  				continue
  1330  			}
  1331  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1332  				if s.values[v.ID].needReg {
  1333  					if v.Op == OpSelectN {
  1334  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1335  					} else {
  1336  						var i = 0
  1337  						if v.Op == OpSelect1 {
  1338  							i = 1
  1339  						}
  1340  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1341  					}
  1342  				}
  1343  				b.Values = append(b.Values, v)
  1344  				s.advanceUses(v)
  1345  				continue
  1346  			}
  1347  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1348  				// use hardware g register
  1349  				if s.regs[s.GReg].v != nil {
  1350  					s.freeReg(s.GReg) // kick out the old value
  1351  				}
  1352  				s.assignReg(s.GReg, v, v)
  1353  				b.Values = append(b.Values, v)
  1354  				s.advanceUses(v)
  1355  				continue
  1356  			}
  1357  			if v.Op == OpArg {
  1358  				// Args are "pre-spilled" values. We don't allocate
  1359  				// any register here. We just set up the spill pointer to
  1360  				// point at itself and any later user will restore it to use it.
  1361  				s.values[v.ID].spill = v
  1362  				b.Values = append(b.Values, v)
  1363  				s.advanceUses(v)
  1364  				continue
  1365  			}
  1366  			if v.Op == OpKeepAlive {
  1367  				// Make sure the argument to v is still live here.
  1368  				s.advanceUses(v)
  1369  				a := v.Args[0]
  1370  				vi := &s.values[a.ID]
  1371  				if vi.regs == 0 && !vi.rematerializeable {
  1372  					// Use the spill location.
  1373  					// This forces later liveness analysis to make the
  1374  					// value live at this point.
  1375  					v.SetArg(0, s.makeSpill(a, b))
  1376  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1377  					// Rematerializeable value with a gc.Node. This is the address of
  1378  					// a stack object (e.g. an LEAQ). Keep the object live.
  1379  					// Change it to VarLive, which is what plive expects for locals.
  1380  					v.Op = OpVarLive
  1381  					v.SetArgs1(v.Args[1])
  1382  					v.Aux = a.Aux
  1383  				} else {
  1384  					// In-register and rematerializeable values are already live.
  1385  					// These are typically rematerializeable constants like nil,
  1386  					// or values of a variable that were modified since the last call.
  1387  					v.Op = OpCopy
  1388  					v.SetArgs1(v.Args[1])
  1389  				}
  1390  				b.Values = append(b.Values, v)
  1391  				continue
  1392  			}
  1393  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1394  				// No register allocation required (or none specified yet)
  1395  				if s.doClobber && v.Op.IsCall() {
  1396  					s.clobberRegs(regspec.clobbers)
  1397  				}
  1398  				s.freeRegs(regspec.clobbers)
  1399  				b.Values = append(b.Values, v)
  1400  				s.advanceUses(v)
  1401  				continue
  1402  			}
  1403  
  1404  			if s.values[v.ID].rematerializeable {
  1405  				// Value is rematerializeable, don't issue it here.
  1406  				// It will get issued just before each use (see
  1407  				// allocValueToReg).
  1408  				for _, a := range v.Args {
  1409  					a.Uses--
  1410  				}
  1411  				s.advanceUses(v)
  1412  				continue
  1413  			}
  1414  
  1415  			if s.f.pass.debug > regDebug {
  1416  				fmt.Printf("value %s\n", v.LongString())
  1417  				fmt.Printf("  out:")
  1418  				for _, r := range dinfo[idx].out {
  1419  					if r != noRegister {
  1420  						fmt.Printf(" %s", &s.registers[r])
  1421  					}
  1422  				}
  1423  				fmt.Println()
  1424  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1425  					fmt.Printf("  in%d:", i)
  1426  					for _, r := range dinfo[idx].in[i] {
  1427  						if r != noRegister {
  1428  							fmt.Printf(" %s", &s.registers[r])
  1429  						}
  1430  					}
  1431  					fmt.Println()
  1432  				}
  1433  			}
  1434  
  1435  			// Move arguments to registers.
  1436  			// First, if an arg must be in a specific register and it is already
  1437  			// in place, keep it.
  1438  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1439  			for i, a := range v.Args {
  1440  				if !s.values[a.ID].needReg {
  1441  					args[i] = a
  1442  				}
  1443  			}
  1444  			for _, i := range regspec.inputs {
  1445  				mask := i.regs
  1446  				if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
  1447  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1448  				}
  1449  			}
  1450  			// Then, if an arg must be in a specific register and that
  1451  			// register is free, allocate that one. Otherwise when processing
  1452  			// another input we may kick a value into the free register, which
  1453  			// then will be kicked out again.
  1454  			// This is a common case for passing-in-register arguments for
  1455  			// function calls.
  1456  			for {
  1457  				freed := false
  1458  				for _, i := range regspec.inputs {
  1459  					if args[i.idx] != nil {
  1460  						continue // already allocated
  1461  					}
  1462  					mask := i.regs
  1463  					if countRegs(mask) == 1 && mask&^s.used != 0 {
  1464  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1465  						// If the input is in other registers that will be clobbered by v,
  1466  						// or the input is dead, free the registers. This may make room
  1467  						// for other inputs.
  1468  						oldregs := s.values[v.Args[i.idx].ID].regs
  1469  						if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1470  							s.freeRegs(oldregs &^ mask &^ s.nospill)
  1471  							freed = true
  1472  						}
  1473  					}
  1474  				}
  1475  				if !freed {
  1476  					break
  1477  				}
  1478  			}
  1479  			// Last, allocate remaining ones, in an ordering defined
  1480  			// by the register specification (most constrained first).
  1481  			for _, i := range regspec.inputs {
  1482  				if args[i.idx] != nil {
  1483  					continue // already allocated
  1484  				}
  1485  				mask := i.regs
  1486  				if mask&s.values[v.Args[i.idx].ID].regs == 0 {
  1487  					// Need a new register for the input.
  1488  					mask &= s.allocatable
  1489  					mask &^= s.nospill
  1490  					// Used desired register if available.
  1491  					if i.idx < 3 {
  1492  						for _, r := range dinfo[idx].in[i.idx] {
  1493  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1494  								// Desired register is allowed and unused.
  1495  								mask = regMask(1) << r
  1496  								break
  1497  							}
  1498  						}
  1499  					}
  1500  					// Avoid registers we're saving for other values.
  1501  					if mask&^desired.avoid != 0 {
  1502  						mask &^= desired.avoid
  1503  					}
  1504  				}
  1505  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1506  			}
  1507  
  1508  			// If the output clobbers the input register, make sure we have
  1509  			// at least two copies of the input register so we don't
  1510  			// have to reload the value from the spill location.
  1511  			if opcodeTable[v.Op].resultInArg0 {
  1512  				var m regMask
  1513  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1514  					// arg0 is dead.  We can clobber its register.
  1515  					goto ok
  1516  				}
  1517  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1518  					args[0], args[1] = args[1], args[0]
  1519  					goto ok
  1520  				}
  1521  				if s.values[v.Args[0].ID].rematerializeable {
  1522  					// We can rematerialize the input, don't worry about clobbering it.
  1523  					goto ok
  1524  				}
  1525  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1526  					args[0], args[1] = args[1], args[0]
  1527  					goto ok
  1528  				}
  1529  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1530  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1531  					goto ok
  1532  				}
  1533  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1534  					args[0], args[1] = args[1], args[0]
  1535  					goto ok
  1536  				}
  1537  
  1538  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1539  				// need to make a copy of an input so we have a register we can modify.
  1540  
  1541  				// Possible new registers to copy into.
  1542  				m = s.compatRegs(v.Args[0].Type) &^ s.used
  1543  				if m == 0 {
  1544  					// No free registers.  In this case we'll just clobber
  1545  					// an input and future uses of that input must use a restore.
  1546  					// TODO(khr): We should really do this like allocReg does it,
  1547  					// spilling the value with the most distant next use.
  1548  					goto ok
  1549  				}
  1550  
  1551  				// Try to move an input to the desired output, if allowed.
  1552  				for _, r := range dinfo[idx].out {
  1553  					if r != noRegister && (m&regspec.outputs[0].regs)>>r&1 != 0 {
  1554  						m = regMask(1) << r
  1555  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1556  						// Note: we update args[0] so the instruction will
  1557  						// use the register copy we just made.
  1558  						goto ok
  1559  					}
  1560  				}
  1561  				// Try to copy input to its desired location & use its old
  1562  				// location as the result register.
  1563  				for _, r := range dinfo[idx].in[0] {
  1564  					if r != noRegister && m>>r&1 != 0 {
  1565  						m = regMask(1) << r
  1566  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1567  						s.copies[c] = false
  1568  						// Note: no update to args[0] so the instruction will
  1569  						// use the original copy.
  1570  						goto ok
  1571  					}
  1572  				}
  1573  				if opcodeTable[v.Op].commutative {
  1574  					for _, r := range dinfo[idx].in[1] {
  1575  						if r != noRegister && m>>r&1 != 0 {
  1576  							m = regMask(1) << r
  1577  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1578  							s.copies[c] = false
  1579  							args[0], args[1] = args[1], args[0]
  1580  							goto ok
  1581  						}
  1582  					}
  1583  				}
  1584  
  1585  				// Avoid future fixed uses if we can.
  1586  				if m&^desired.avoid != 0 {
  1587  					m &^= desired.avoid
  1588  				}
  1589  				// Save input 0 to a new register so we can clobber it.
  1590  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1591  				s.copies[c] = false
  1592  
  1593  				// Normally we use the register of the old copy of input 0 as the target.
  1594  				// However, if input 0 is already in its desired register then we use
  1595  				// the register of the new copy instead.
  1596  				if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
  1597  					if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
  1598  						r := register(rp.num)
  1599  						for _, r2 := range dinfo[idx].in[0] {
  1600  							if r == r2 {
  1601  								args[0] = c
  1602  								break
  1603  							}
  1604  						}
  1605  					}
  1606  				}
  1607  			}
  1608  
  1609  		ok:
  1610  			// Pick a temporary register if needed.
  1611  			// It should be distinct from all the input registers, so we
  1612  			// allocate it after all the input registers, but before
  1613  			// the input registers are freed via advanceUses below.
  1614  			// (Not all instructions need that distinct part, but it is conservative.)
  1615  			if opcodeTable[v.Op].needIntTemp {
  1616  				m := s.allocatable & s.f.Config.gpRegMask
  1617  				if m&^desired.avoid&^s.nospill != 0 {
  1618  					m &^= desired.avoid
  1619  				}
  1620  				tmpReg = s.allocReg(m, &tmpVal)
  1621  				s.nospill |= regMask(1) << tmpReg
  1622  			}
  1623  
  1624  			// Now that all args are in regs, we're ready to issue the value itself.
  1625  			// Before we pick a register for the output value, allow input registers
  1626  			// to be deallocated. We do this here so that the output can use the
  1627  			// same register as a dying input.
  1628  			if !opcodeTable[v.Op].resultNotInArgs {
  1629  				s.tmpused = s.nospill
  1630  				s.nospill = 0
  1631  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1632  			}
  1633  
  1634  			// Dump any registers which will be clobbered
  1635  			if s.doClobber && v.Op.IsCall() {
  1636  				// clobber registers that are marked as clobber in regmask, but
  1637  				// don't clobber inputs.
  1638  				s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
  1639  			}
  1640  			s.freeRegs(regspec.clobbers)
  1641  			s.tmpused |= regspec.clobbers
  1642  
  1643  			// Pick registers for outputs.
  1644  			{
  1645  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1646  				maxOutIdx := -1
  1647  				var used regMask
  1648  				if tmpReg != noRegister {
  1649  					// Ensure output registers are distinct from the temporary register.
  1650  					// (Not all instructions need that distinct part, but it is conservative.)
  1651  					used |= regMask(1) << tmpReg
  1652  				}
  1653  				for _, out := range regspec.outputs {
  1654  					mask := out.regs & s.allocatable &^ used
  1655  					if mask == 0 {
  1656  						continue
  1657  					}
  1658  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1659  						if !opcodeTable[v.Op].commutative {
  1660  							// Output must use the same register as input 0.
  1661  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1662  							if mask>>r&1 == 0 {
  1663  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1664  							}
  1665  							mask = regMask(1) << r
  1666  						} else {
  1667  							// Output must use the same register as input 0 or 1.
  1668  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1669  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1670  							// Check r0 and r1 for desired output register.
  1671  							found := false
  1672  							for _, r := range dinfo[idx].out {
  1673  								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
  1674  									mask = regMask(1) << r
  1675  									found = true
  1676  									if r == r1 {
  1677  										args[0], args[1] = args[1], args[0]
  1678  									}
  1679  									break
  1680  								}
  1681  							}
  1682  							if !found {
  1683  								// Neither are desired, pick r0.
  1684  								mask = regMask(1) << r0
  1685  							}
  1686  						}
  1687  					}
  1688  					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
  1689  						for _, r := range dinfo[idx].out {
  1690  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1691  								// Desired register is allowed and unused.
  1692  								mask = regMask(1) << r
  1693  								break
  1694  							}
  1695  						}
  1696  					}
  1697  					// Avoid registers we're saving for other values.
  1698  					if mask&^desired.avoid&^s.nospill&^s.used != 0 {
  1699  						mask &^= desired.avoid
  1700  					}
  1701  					r := s.allocReg(mask, v)
  1702  					if out.idx > maxOutIdx {
  1703  						maxOutIdx = out.idx
  1704  					}
  1705  					outRegs[out.idx] = r
  1706  					used |= regMask(1) << r
  1707  					s.tmpused |= regMask(1) << r
  1708  				}
  1709  				// Record register choices
  1710  				if v.Type.IsTuple() {
  1711  					var outLocs LocPair
  1712  					if r := outRegs[0]; r != noRegister {
  1713  						outLocs[0] = &s.registers[r]
  1714  					}
  1715  					if r := outRegs[1]; r != noRegister {
  1716  						outLocs[1] = &s.registers[r]
  1717  					}
  1718  					s.f.setHome(v, outLocs)
  1719  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1720  				} else if v.Type.IsResults() {
  1721  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1722  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1723  					for i := 0; i <= maxOutIdx; i++ {
  1724  						if r := outRegs[i]; r != noRegister {
  1725  							outLocs[i] = &s.registers[r]
  1726  						}
  1727  					}
  1728  					s.f.setHome(v, outLocs)
  1729  				} else {
  1730  					if r := outRegs[0]; r != noRegister {
  1731  						s.assignReg(r, v, v)
  1732  					}
  1733  				}
  1734  				if tmpReg != noRegister {
  1735  					// Remember the temp register allocation, if any.
  1736  					if s.f.tempRegs == nil {
  1737  						s.f.tempRegs = map[ID]*Register{}
  1738  					}
  1739  					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
  1740  				}
  1741  			}
  1742  
  1743  			// deallocate dead args, if we have not done so
  1744  			if opcodeTable[v.Op].resultNotInArgs {
  1745  				s.nospill = 0
  1746  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1747  			}
  1748  			s.tmpused = 0
  1749  
  1750  			// Issue the Value itself.
  1751  			for i, a := range args {
  1752  				v.SetArg(i, a) // use register version of arguments
  1753  			}
  1754  			b.Values = append(b.Values, v)
  1755  		}
  1756  
  1757  		// Copy the control values - we need this so we can reduce the
  1758  		// uses property of these values later.
  1759  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  1760  
  1761  		// Load control values into registers.
  1762  		for i, v := range b.ControlValues() {
  1763  			if !s.values[v.ID].needReg {
  1764  				continue
  1765  			}
  1766  			if s.f.pass.debug > regDebug {
  1767  				fmt.Printf("  processing control %s\n", v.LongString())
  1768  			}
  1769  			// We assume that a control input can be passed in any
  1770  			// type-compatible register. If this turns out not to be true,
  1771  			// we'll need to introduce a regspec for a block's control value.
  1772  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  1773  		}
  1774  
  1775  		// Reduce the uses of the control values once registers have been loaded.
  1776  		// This loop is equivalent to the advanceUses method.
  1777  		for _, v := range controls {
  1778  			vi := &s.values[v.ID]
  1779  			if !vi.needReg {
  1780  				continue
  1781  			}
  1782  			// Remove this use from the uses list.
  1783  			u := vi.uses
  1784  			vi.uses = u.next
  1785  			if u.next == nil {
  1786  				s.freeRegs(vi.regs) // value is dead
  1787  			}
  1788  			u.next = s.freeUseRecords
  1789  			s.freeUseRecords = u
  1790  		}
  1791  
  1792  		// If we are approaching a merge point and we are the primary
  1793  		// predecessor of it, find live values that we use soon after
  1794  		// the merge point and promote them to registers now.
  1795  		if len(b.Succs) == 1 {
  1796  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  1797  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  1798  			}
  1799  			// For this to be worthwhile, the loop must have no calls in it.
  1800  			top := b.Succs[0].b
  1801  			loop := s.loopnest.b2l[top.ID]
  1802  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  1803  				goto badloop
  1804  			}
  1805  
  1806  			// TODO: sort by distance, pick the closest ones?
  1807  			for _, live := range s.live[b.ID] {
  1808  				if live.dist >= unlikelyDistance {
  1809  					// Don't preload anything live after the loop.
  1810  					continue
  1811  				}
  1812  				vid := live.ID
  1813  				vi := &s.values[vid]
  1814  				if vi.regs != 0 {
  1815  					continue
  1816  				}
  1817  				if vi.rematerializeable {
  1818  					continue
  1819  				}
  1820  				v := s.orig[vid]
  1821  				m := s.compatRegs(v.Type) &^ s.used
  1822  				// Used desired register if available.
  1823  			outerloop:
  1824  				for _, e := range desired.entries {
  1825  					if e.ID != v.ID {
  1826  						continue
  1827  					}
  1828  					for _, r := range e.regs {
  1829  						if r != noRegister && m>>r&1 != 0 {
  1830  							m = regMask(1) << r
  1831  							break outerloop
  1832  						}
  1833  					}
  1834  				}
  1835  				if m&^desired.avoid != 0 {
  1836  					m &^= desired.avoid
  1837  				}
  1838  				if m != 0 {
  1839  					s.allocValToReg(v, m, false, b.Pos)
  1840  				}
  1841  			}
  1842  		}
  1843  	badloop:
  1844  		;
  1845  
  1846  		// Save end-of-block register state.
  1847  		// First count how many, this cuts allocations in half.
  1848  		k := 0
  1849  		for r := register(0); r < s.numRegs; r++ {
  1850  			v := s.regs[r].v
  1851  			if v == nil {
  1852  				continue
  1853  			}
  1854  			k++
  1855  		}
  1856  		regList := make([]endReg, 0, k)
  1857  		for r := register(0); r < s.numRegs; r++ {
  1858  			v := s.regs[r].v
  1859  			if v == nil {
  1860  				continue
  1861  			}
  1862  			regList = append(regList, endReg{r, v, s.regs[r].c})
  1863  		}
  1864  		s.endRegs[b.ID] = regList
  1865  
  1866  		if checkEnabled {
  1867  			regValLiveSet.clear()
  1868  			for _, x := range s.live[b.ID] {
  1869  				regValLiveSet.add(x.ID)
  1870  			}
  1871  			for r := register(0); r < s.numRegs; r++ {
  1872  				v := s.regs[r].v
  1873  				if v == nil {
  1874  					continue
  1875  				}
  1876  				if !regValLiveSet.contains(v.ID) {
  1877  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  1878  				}
  1879  			}
  1880  		}
  1881  
  1882  		// If a value is live at the end of the block and
  1883  		// isn't in a register, generate a use for the spill location.
  1884  		// We need to remember this information so that
  1885  		// the liveness analysis in stackalloc is correct.
  1886  		for _, e := range s.live[b.ID] {
  1887  			vi := &s.values[e.ID]
  1888  			if vi.regs != 0 {
  1889  				// in a register, we'll use that source for the merge.
  1890  				continue
  1891  			}
  1892  			if vi.rematerializeable {
  1893  				// we'll rematerialize during the merge.
  1894  				continue
  1895  			}
  1896  			if s.f.pass.debug > regDebug {
  1897  				fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  1898  			}
  1899  			spill := s.makeSpill(s.orig[e.ID], b)
  1900  			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  1901  		}
  1902  
  1903  		// Clear any final uses.
  1904  		// All that is left should be the pseudo-uses added for values which
  1905  		// are live at the end of b.
  1906  		for _, e := range s.live[b.ID] {
  1907  			u := s.values[e.ID].uses
  1908  			if u == nil {
  1909  				f.Fatalf("live at end, no uses v%d", e.ID)
  1910  			}
  1911  			if u.next != nil {
  1912  				f.Fatalf("live at end, too many uses v%d", e.ID)
  1913  			}
  1914  			s.values[e.ID].uses = nil
  1915  			u.next = s.freeUseRecords
  1916  			s.freeUseRecords = u
  1917  		}
  1918  
  1919  		// allocReg may have dropped registers from startRegsMask that
  1920  		// aren't actually needed in startRegs. Synchronize back to
  1921  		// startRegs.
  1922  		//
  1923  		// This must be done before placing spills, which will look at
  1924  		// startRegs to decide if a block is a valid block for a spill.
  1925  		if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
  1926  			regs := make([]startReg, 0, c)
  1927  			for _, sr := range s.startRegs[b.ID] {
  1928  				if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
  1929  					continue
  1930  				}
  1931  				regs = append(regs, sr)
  1932  			}
  1933  			s.startRegs[b.ID] = regs
  1934  		}
  1935  	}
  1936  
  1937  	// Decide where the spills we generated will go.
  1938  	s.placeSpills()
  1939  
  1940  	// Anything that didn't get a register gets a stack location here.
  1941  	// (StoreReg, stack-based phis, inputs, ...)
  1942  	stacklive := stackalloc(s.f, s.spillLive)
  1943  
  1944  	// Fix up all merge edges.
  1945  	s.shuffle(stacklive)
  1946  
  1947  	// Erase any copies we never used.
  1948  	// Also, an unused copy might be the only use of another copy,
  1949  	// so continue erasing until we reach a fixed point.
  1950  	for {
  1951  		progress := false
  1952  		for c, used := range s.copies {
  1953  			if !used && c.Uses == 0 {
  1954  				if s.f.pass.debug > regDebug {
  1955  					fmt.Printf("delete copied value %s\n", c.LongString())
  1956  				}
  1957  				c.resetArgs()
  1958  				f.freeValue(c)
  1959  				delete(s.copies, c)
  1960  				progress = true
  1961  			}
  1962  		}
  1963  		if !progress {
  1964  			break
  1965  		}
  1966  	}
  1967  
  1968  	for _, b := range s.visitOrder {
  1969  		i := 0
  1970  		for _, v := range b.Values {
  1971  			if v.Op == OpInvalid {
  1972  				continue
  1973  			}
  1974  			b.Values[i] = v
  1975  			i++
  1976  		}
  1977  		b.Values = b.Values[:i]
  1978  	}
  1979  }
  1980  
  1981  func (s *regAllocState) placeSpills() {
  1982  	mustBeFirst := func(op Op) bool {
  1983  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  1984  	}
  1985  
  1986  	// Start maps block IDs to the list of spills
  1987  	// that go at the start of the block (but after any phis).
  1988  	start := map[ID][]*Value{}
  1989  	// After maps value IDs to the list of spills
  1990  	// that go immediately after that value ID.
  1991  	after := map[ID][]*Value{}
  1992  
  1993  	for i := range s.values {
  1994  		vi := s.values[i]
  1995  		spill := vi.spill
  1996  		if spill == nil {
  1997  			continue
  1998  		}
  1999  		if spill.Block != nil {
  2000  			// Some spills are already fully set up,
  2001  			// like OpArgs and stack-based phis.
  2002  			continue
  2003  		}
  2004  		v := s.orig[i]
  2005  
  2006  		// Walk down the dominator tree looking for a good place to
  2007  		// put the spill of v.  At the start "best" is the best place
  2008  		// we have found so far.
  2009  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  2010  		if v == nil {
  2011  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  2012  		}
  2013  		best := v.Block
  2014  		bestArg := v
  2015  		var bestDepth int16
  2016  		if l := s.loopnest.b2l[best.ID]; l != nil {
  2017  			bestDepth = l.depth
  2018  		}
  2019  		b := best
  2020  		const maxSpillSearch = 100
  2021  		for i := 0; i < maxSpillSearch; i++ {
  2022  			// Find the child of b in the dominator tree which
  2023  			// dominates all restores.
  2024  			p := b
  2025  			b = nil
  2026  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  2027  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  2028  					// c also dominates all restores.  Walk down into c.
  2029  					b = c
  2030  					break
  2031  				}
  2032  			}
  2033  			if b == nil {
  2034  				// Ran out of blocks which dominate all restores.
  2035  				break
  2036  			}
  2037  
  2038  			var depth int16
  2039  			if l := s.loopnest.b2l[b.ID]; l != nil {
  2040  				depth = l.depth
  2041  			}
  2042  			if depth > bestDepth {
  2043  				// Don't push the spill into a deeper loop.
  2044  				continue
  2045  			}
  2046  
  2047  			// If v is in a register at the start of b, we can
  2048  			// place the spill here (after the phis).
  2049  			if len(b.Preds) == 1 {
  2050  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  2051  					if e.v == v {
  2052  						// Found a better spot for the spill.
  2053  						best = b
  2054  						bestArg = e.c
  2055  						bestDepth = depth
  2056  						break
  2057  					}
  2058  				}
  2059  			} else {
  2060  				for _, e := range s.startRegs[b.ID] {
  2061  					if e.v == v {
  2062  						// Found a better spot for the spill.
  2063  						best = b
  2064  						bestArg = e.c
  2065  						bestDepth = depth
  2066  						break
  2067  					}
  2068  				}
  2069  			}
  2070  		}
  2071  
  2072  		// Put the spill in the best block we found.
  2073  		spill.Block = best
  2074  		spill.AddArg(bestArg)
  2075  		if best == v.Block && !mustBeFirst(v.Op) {
  2076  			// Place immediately after v.
  2077  			after[v.ID] = append(after[v.ID], spill)
  2078  		} else {
  2079  			// Place at the start of best block.
  2080  			start[best.ID] = append(start[best.ID], spill)
  2081  		}
  2082  	}
  2083  
  2084  	// Insert spill instructions into the block schedules.
  2085  	var oldSched []*Value
  2086  	for _, b := range s.visitOrder {
  2087  		nfirst := 0
  2088  		for _, v := range b.Values {
  2089  			if !mustBeFirst(v.Op) {
  2090  				break
  2091  			}
  2092  			nfirst++
  2093  		}
  2094  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  2095  		b.Values = b.Values[:nfirst]
  2096  		b.Values = append(b.Values, start[b.ID]...)
  2097  		for _, v := range oldSched {
  2098  			b.Values = append(b.Values, v)
  2099  			b.Values = append(b.Values, after[v.ID]...)
  2100  		}
  2101  	}
  2102  }
  2103  
  2104  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  2105  func (s *regAllocState) shuffle(stacklive [][]ID) {
  2106  	var e edgeState
  2107  	e.s = s
  2108  	e.cache = map[ID][]*Value{}
  2109  	e.contents = map[Location]contentRecord{}
  2110  	if s.f.pass.debug > regDebug {
  2111  		fmt.Printf("shuffle %s\n", s.f.Name)
  2112  		fmt.Println(s.f.String())
  2113  	}
  2114  
  2115  	for _, b := range s.visitOrder {
  2116  		if len(b.Preds) <= 1 {
  2117  			continue
  2118  		}
  2119  		e.b = b
  2120  		for i, edge := range b.Preds {
  2121  			p := edge.b
  2122  			e.p = p
  2123  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2124  			e.process()
  2125  		}
  2126  	}
  2127  
  2128  	if s.f.pass.debug > regDebug {
  2129  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2130  		fmt.Println(s.f.String())
  2131  	}
  2132  }
  2133  
  2134  type edgeState struct {
  2135  	s    *regAllocState
  2136  	p, b *Block // edge goes from p->b.
  2137  
  2138  	// for each pre-regalloc value, a list of equivalent cached values
  2139  	cache      map[ID][]*Value
  2140  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2141  
  2142  	// map from location to the value it contains
  2143  	contents map[Location]contentRecord
  2144  
  2145  	// desired destination locations
  2146  	destinations []dstRecord
  2147  	extra        []dstRecord
  2148  
  2149  	usedRegs              regMask // registers currently holding something
  2150  	uniqueRegs            regMask // registers holding the only copy of a value
  2151  	finalRegs             regMask // registers holding final target
  2152  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2153  }
  2154  
  2155  type contentRecord struct {
  2156  	vid   ID       // pre-regalloc value
  2157  	c     *Value   // cached value
  2158  	final bool     // this is a satisfied destination
  2159  	pos   src.XPos // source position of use of the value
  2160  }
  2161  
  2162  type dstRecord struct {
  2163  	loc    Location // register or stack slot
  2164  	vid    ID       // pre-regalloc value it should contain
  2165  	splice **Value  // place to store reference to the generating instruction
  2166  	pos    src.XPos // source position of use of this location
  2167  }
  2168  
  2169  // setup initializes the edge state for shuffling.
  2170  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2171  	if e.s.f.pass.debug > regDebug {
  2172  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2173  	}
  2174  
  2175  	// Clear state.
  2176  	for _, vid := range e.cachedVals {
  2177  		delete(e.cache, vid)
  2178  	}
  2179  	e.cachedVals = e.cachedVals[:0]
  2180  	for k := range e.contents {
  2181  		delete(e.contents, k)
  2182  	}
  2183  	e.usedRegs = 0
  2184  	e.uniqueRegs = 0
  2185  	e.finalRegs = 0
  2186  	e.rematerializeableRegs = 0
  2187  
  2188  	// Live registers can be sources.
  2189  	for _, x := range srcReg {
  2190  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2191  	}
  2192  	// So can all of the spill locations.
  2193  	for _, spillID := range stacklive {
  2194  		v := e.s.orig[spillID]
  2195  		spill := e.s.values[v.ID].spill
  2196  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2197  			// Spills were placed that only dominate the uses found
  2198  			// during the first regalloc pass. The edge fixup code
  2199  			// can't use a spill location if the spill doesn't dominate
  2200  			// the edge.
  2201  			// We are guaranteed that if the spill doesn't dominate this edge,
  2202  			// then the value is available in a register (because we called
  2203  			// makeSpill for every value not in a register at the start
  2204  			// of an edge).
  2205  			continue
  2206  		}
  2207  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2208  	}
  2209  
  2210  	// Figure out all the destinations we need.
  2211  	dsts := e.destinations[:0]
  2212  	for _, x := range dstReg {
  2213  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2214  	}
  2215  	// Phis need their args to end up in a specific location.
  2216  	for _, v := range e.b.Values {
  2217  		if v.Op != OpPhi {
  2218  			break
  2219  		}
  2220  		loc := e.s.f.getHome(v.ID)
  2221  		if loc == nil {
  2222  			continue
  2223  		}
  2224  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2225  	}
  2226  	e.destinations = dsts
  2227  
  2228  	if e.s.f.pass.debug > regDebug {
  2229  		for _, vid := range e.cachedVals {
  2230  			a := e.cache[vid]
  2231  			for _, c := range a {
  2232  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2233  			}
  2234  		}
  2235  		for _, d := range e.destinations {
  2236  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2237  		}
  2238  	}
  2239  }
  2240  
  2241  // process generates code to move all the values to the right destination locations.
  2242  func (e *edgeState) process() {
  2243  	dsts := e.destinations
  2244  
  2245  	// Process the destinations until they are all satisfied.
  2246  	for len(dsts) > 0 {
  2247  		i := 0
  2248  		for _, d := range dsts {
  2249  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2250  				// Failed - save for next iteration.
  2251  				dsts[i] = d
  2252  				i++
  2253  			}
  2254  		}
  2255  		if i < len(dsts) {
  2256  			// Made some progress. Go around again.
  2257  			dsts = dsts[:i]
  2258  
  2259  			// Append any extras destinations we generated.
  2260  			dsts = append(dsts, e.extra...)
  2261  			e.extra = e.extra[:0]
  2262  			continue
  2263  		}
  2264  
  2265  		// We made no progress. That means that any
  2266  		// remaining unsatisfied moves are in simple cycles.
  2267  		// For example, A -> B -> C -> D -> A.
  2268  		//   A ----> B
  2269  		//   ^       |
  2270  		//   |       |
  2271  		//   |       v
  2272  		//   D <---- C
  2273  
  2274  		// To break the cycle, we pick an unused register, say R,
  2275  		// and put a copy of B there.
  2276  		//   A ----> B
  2277  		//   ^       |
  2278  		//   |       |
  2279  		//   |       v
  2280  		//   D <---- C <---- R=copyofB
  2281  		// When we resume the outer loop, the A->B move can now proceed,
  2282  		// and eventually the whole cycle completes.
  2283  
  2284  		// Copy any cycle location to a temp register. This duplicates
  2285  		// one of the cycle entries, allowing the just duplicated value
  2286  		// to be overwritten and the cycle to proceed.
  2287  		d := dsts[0]
  2288  		loc := d.loc
  2289  		vid := e.contents[loc].vid
  2290  		c := e.contents[loc].c
  2291  		r := e.findRegFor(c.Type)
  2292  		if e.s.f.pass.debug > regDebug {
  2293  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2294  		}
  2295  		e.erase(r)
  2296  		pos := d.pos.WithNotStmt()
  2297  		if _, isReg := loc.(*Register); isReg {
  2298  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2299  		} else {
  2300  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2301  		}
  2302  		e.set(r, vid, c, false, pos)
  2303  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2304  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2305  		}
  2306  	}
  2307  }
  2308  
  2309  // processDest generates code to put value vid into location loc. Returns true
  2310  // if progress was made.
  2311  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2312  	pos = pos.WithNotStmt()
  2313  	occupant := e.contents[loc]
  2314  	if occupant.vid == vid {
  2315  		// Value is already in the correct place.
  2316  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2317  		if splice != nil {
  2318  			(*splice).Uses--
  2319  			*splice = occupant.c
  2320  			occupant.c.Uses++
  2321  		}
  2322  		// Note: if splice==nil then c will appear dead. This is
  2323  		// non-SSA formed code, so be careful after this pass not to run
  2324  		// deadcode elimination.
  2325  		if _, ok := e.s.copies[occupant.c]; ok {
  2326  			// The copy at occupant.c was used to avoid spill.
  2327  			e.s.copies[occupant.c] = true
  2328  		}
  2329  		return true
  2330  	}
  2331  
  2332  	// Check if we're allowed to clobber the destination location.
  2333  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
  2334  		// We can't overwrite the last copy
  2335  		// of a value that needs to survive.
  2336  		return false
  2337  	}
  2338  
  2339  	// Copy from a source of v, register preferred.
  2340  	v := e.s.orig[vid]
  2341  	var c *Value
  2342  	var src Location
  2343  	if e.s.f.pass.debug > regDebug {
  2344  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2345  		fmt.Printf("sources of v%d:", vid)
  2346  	}
  2347  	for _, w := range e.cache[vid] {
  2348  		h := e.s.f.getHome(w.ID)
  2349  		if e.s.f.pass.debug > regDebug {
  2350  			fmt.Printf(" %s:%s", h, w)
  2351  		}
  2352  		_, isreg := h.(*Register)
  2353  		if src == nil || isreg {
  2354  			c = w
  2355  			src = h
  2356  		}
  2357  	}
  2358  	if e.s.f.pass.debug > regDebug {
  2359  		if src != nil {
  2360  			fmt.Printf(" [use %s]\n", src)
  2361  		} else {
  2362  			fmt.Printf(" [no source]\n")
  2363  		}
  2364  	}
  2365  	_, dstReg := loc.(*Register)
  2366  
  2367  	// Pre-clobber destination. This avoids the
  2368  	// following situation:
  2369  	//   - v is currently held in R0 and stacktmp0.
  2370  	//   - We want to copy stacktmp1 to stacktmp0.
  2371  	//   - We choose R0 as the temporary register.
  2372  	// During the copy, both R0 and stacktmp0 are
  2373  	// clobbered, losing both copies of v. Oops!
  2374  	// Erasing the destination early means R0 will not
  2375  	// be chosen as the temp register, as it will then
  2376  	// be the last copy of v.
  2377  	e.erase(loc)
  2378  	var x *Value
  2379  	if c == nil || e.s.values[vid].rematerializeable {
  2380  		if !e.s.values[vid].rematerializeable {
  2381  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2382  		}
  2383  		if dstReg {
  2384  			x = v.copyInto(e.p)
  2385  		} else {
  2386  			// Rematerialize into stack slot. Need a free
  2387  			// register to accomplish this.
  2388  			r := e.findRegFor(v.Type)
  2389  			e.erase(r)
  2390  			x = v.copyIntoWithXPos(e.p, pos)
  2391  			e.set(r, vid, x, false, pos)
  2392  			// Make sure we spill with the size of the slot, not the
  2393  			// size of x (which might be wider due to our dropping
  2394  			// of narrowing conversions).
  2395  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2396  		}
  2397  	} else {
  2398  		// Emit move from src to dst.
  2399  		_, srcReg := src.(*Register)
  2400  		if srcReg {
  2401  			if dstReg {
  2402  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2403  			} else {
  2404  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2405  			}
  2406  		} else {
  2407  			if dstReg {
  2408  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2409  			} else {
  2410  				// mem->mem. Use temp register.
  2411  				r := e.findRegFor(c.Type)
  2412  				e.erase(r)
  2413  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2414  				e.set(r, vid, t, false, pos)
  2415  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2416  			}
  2417  		}
  2418  	}
  2419  	e.set(loc, vid, x, true, pos)
  2420  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2421  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2422  	}
  2423  	if splice != nil {
  2424  		(*splice).Uses--
  2425  		*splice = x
  2426  		x.Uses++
  2427  	}
  2428  	return true
  2429  }
  2430  
  2431  // set changes the contents of location loc to hold the given value and its cached representative.
  2432  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2433  	e.s.f.setHome(c, loc)
  2434  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2435  	a := e.cache[vid]
  2436  	if len(a) == 0 {
  2437  		e.cachedVals = append(e.cachedVals, vid)
  2438  	}
  2439  	a = append(a, c)
  2440  	e.cache[vid] = a
  2441  	if r, ok := loc.(*Register); ok {
  2442  		if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
  2443  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2444  		}
  2445  		e.usedRegs |= regMask(1) << uint(r.num)
  2446  		if final {
  2447  			e.finalRegs |= regMask(1) << uint(r.num)
  2448  		}
  2449  		if len(a) == 1 {
  2450  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2451  		}
  2452  		if len(a) == 2 {
  2453  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2454  				e.uniqueRegs &^= regMask(1) << uint(t.num)
  2455  			}
  2456  		}
  2457  		if e.s.values[vid].rematerializeable {
  2458  			e.rematerializeableRegs |= regMask(1) << uint(r.num)
  2459  		}
  2460  	}
  2461  	if e.s.f.pass.debug > regDebug {
  2462  		fmt.Printf("%s\n", c.LongString())
  2463  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2464  	}
  2465  }
  2466  
  2467  // erase removes any user of loc.
  2468  func (e *edgeState) erase(loc Location) {
  2469  	cr := e.contents[loc]
  2470  	if cr.c == nil {
  2471  		return
  2472  	}
  2473  	vid := cr.vid
  2474  
  2475  	if cr.final {
  2476  		// Add a destination to move this value back into place.
  2477  		// Make sure it gets added to the tail of the destination queue
  2478  		// so we make progress on other moves first.
  2479  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2480  	}
  2481  
  2482  	// Remove c from the list of cached values.
  2483  	a := e.cache[vid]
  2484  	for i, c := range a {
  2485  		if e.s.f.getHome(c.ID) == loc {
  2486  			if e.s.f.pass.debug > regDebug {
  2487  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2488  			}
  2489  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2490  			break
  2491  		}
  2492  	}
  2493  	e.cache[vid] = a
  2494  
  2495  	// Update register masks.
  2496  	if r, ok := loc.(*Register); ok {
  2497  		e.usedRegs &^= regMask(1) << uint(r.num)
  2498  		if cr.final {
  2499  			e.finalRegs &^= regMask(1) << uint(r.num)
  2500  		}
  2501  		e.rematerializeableRegs &^= regMask(1) << uint(r.num)
  2502  	}
  2503  	if len(a) == 1 {
  2504  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2505  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2506  		}
  2507  	}
  2508  }
  2509  
  2510  // findRegFor finds a register we can use to make a temp copy of type typ.
  2511  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2512  	// Which registers are possibilities.
  2513  	types := &e.s.f.Config.Types
  2514  	m := e.s.compatRegs(typ)
  2515  
  2516  	// Pick a register. In priority order:
  2517  	// 1) an unused register
  2518  	// 2) a non-unique register not holding a final value
  2519  	// 3) a non-unique register
  2520  	// 4) a register holding a rematerializeable value
  2521  	x := m &^ e.usedRegs
  2522  	if x != 0 {
  2523  		return &e.s.registers[pickReg(x)]
  2524  	}
  2525  	x = m &^ e.uniqueRegs &^ e.finalRegs
  2526  	if x != 0 {
  2527  		return &e.s.registers[pickReg(x)]
  2528  	}
  2529  	x = m &^ e.uniqueRegs
  2530  	if x != 0 {
  2531  		return &e.s.registers[pickReg(x)]
  2532  	}
  2533  	x = m & e.rematerializeableRegs
  2534  	if x != 0 {
  2535  		return &e.s.registers[pickReg(x)]
  2536  	}
  2537  
  2538  	// No register is available.
  2539  	// Pick a register to spill.
  2540  	for _, vid := range e.cachedVals {
  2541  		a := e.cache[vid]
  2542  		for _, c := range a {
  2543  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
  2544  				if !c.rematerializeable() {
  2545  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2546  					// Allocate a temp location to spill a register to.
  2547  					// The type of the slot is immaterial - it will not be live across
  2548  					// any safepoint. Just use a type big enough to hold any register.
  2549  					t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
  2550  					// TODO: reuse these slots. They'll need to be erased first.
  2551  					e.set(t, vid, x, false, c.Pos)
  2552  					if e.s.f.pass.debug > regDebug {
  2553  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2554  					}
  2555  				}
  2556  				// r will now be overwritten by the caller. At some point
  2557  				// later, the newly saved value will be moved back to its
  2558  				// final destination in processDest.
  2559  				return r
  2560  			}
  2561  		}
  2562  	}
  2563  
  2564  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2565  	for _, vid := range e.cachedVals {
  2566  		a := e.cache[vid]
  2567  		for _, c := range a {
  2568  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2569  		}
  2570  	}
  2571  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2572  	return nil
  2573  }
  2574  
  2575  // rematerializeable reports whether the register allocator should recompute
  2576  // a value instead of spilling/restoring it.
  2577  func (v *Value) rematerializeable() bool {
  2578  	if !opcodeTable[v.Op].rematerializeable {
  2579  		return false
  2580  	}
  2581  	for _, a := range v.Args {
  2582  		// SP and SB (generated by OpSP and OpSB) are always available.
  2583  		if a.Op != OpSP && a.Op != OpSB {
  2584  			return false
  2585  		}
  2586  	}
  2587  	return true
  2588  }
  2589  
  2590  type liveInfo struct {
  2591  	ID   ID       // ID of value
  2592  	dist int32    // # of instructions before next use
  2593  	pos  src.XPos // source position of next use
  2594  }
  2595  
  2596  // computeLive computes a map from block ID to a list of value IDs live at the end
  2597  // of that block. Together with the value ID is a count of how many instructions
  2598  // to the next use of that value. The resulting map is stored in s.live.
  2599  // computeLive also computes the desired register information at the end of each block.
  2600  // This desired register information is stored in s.desired.
  2601  // TODO: this could be quadratic if lots of variables are live across lots of
  2602  // basic blocks. Figure out a way to make this function (or, more precisely, the user
  2603  // of this function) require only linear size & time.
  2604  func (s *regAllocState) computeLive() {
  2605  	f := s.f
  2606  	s.live = make([][]liveInfo, f.NumBlocks())
  2607  	s.desired = make([]desiredState, f.NumBlocks())
  2608  	var phis []*Value
  2609  
  2610  	live := f.newSparseMapPos(f.NumValues())
  2611  	defer f.retSparseMapPos(live)
  2612  	t := f.newSparseMapPos(f.NumValues())
  2613  	defer f.retSparseMapPos(t)
  2614  
  2615  	// Keep track of which value we want in each register.
  2616  	var desired desiredState
  2617  
  2618  	// Instead of iterating over f.Blocks, iterate over their postordering.
  2619  	// Liveness information flows backward, so starting at the end
  2620  	// increases the probability that we will stabilize quickly.
  2621  	// TODO: Do a better job yet. Here's one possibility:
  2622  	// Calculate the dominator tree and locate all strongly connected components.
  2623  	// If a value is live in one block of an SCC, it is live in all.
  2624  	// Walk the dominator tree from end to beginning, just once, treating SCC
  2625  	// components as single blocks, duplicated calculated liveness information
  2626  	// out to all of them.
  2627  	po := f.postorder()
  2628  	s.loopnest = f.loopnest()
  2629  	s.loopnest.calculateDepths()
  2630  	for {
  2631  		changed := false
  2632  
  2633  		for _, b := range po {
  2634  			// Start with known live values at the end of the block.
  2635  			// Add len(b.Values) to adjust from end-of-block distance
  2636  			// to beginning-of-block distance.
  2637  			live.clear()
  2638  			for _, e := range s.live[b.ID] {
  2639  				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
  2640  			}
  2641  
  2642  			// Mark control values as live
  2643  			for _, c := range b.ControlValues() {
  2644  				if s.values[c.ID].needReg {
  2645  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  2646  				}
  2647  			}
  2648  
  2649  			// Propagate backwards to the start of the block
  2650  			// Assumes Values have been scheduled.
  2651  			phis = phis[:0]
  2652  			for i := len(b.Values) - 1; i >= 0; i-- {
  2653  				v := b.Values[i]
  2654  				live.remove(v.ID)
  2655  				if v.Op == OpPhi {
  2656  					// save phi ops for later
  2657  					phis = append(phis, v)
  2658  					continue
  2659  				}
  2660  				if opcodeTable[v.Op].call {
  2661  					c := live.contents()
  2662  					for i := range c {
  2663  						c[i].val += unlikelyDistance
  2664  					}
  2665  				}
  2666  				for _, a := range v.Args {
  2667  					if s.values[a.ID].needReg {
  2668  						live.set(a.ID, int32(i), v.Pos)
  2669  					}
  2670  				}
  2671  			}
  2672  			// Propagate desired registers backwards.
  2673  			desired.copy(&s.desired[b.ID])
  2674  			for i := len(b.Values) - 1; i >= 0; i-- {
  2675  				v := b.Values[i]
  2676  				prefs := desired.remove(v.ID)
  2677  				if v.Op == OpPhi {
  2678  					// TODO: if v is a phi, save desired register for phi inputs.
  2679  					// For now, we just drop it and don't propagate
  2680  					// desired registers back though phi nodes.
  2681  					continue
  2682  				}
  2683  				regspec := s.regspec(v)
  2684  				// Cancel desired registers if they get clobbered.
  2685  				desired.clobber(regspec.clobbers)
  2686  				// Update desired registers if there are any fixed register inputs.
  2687  				for _, j := range regspec.inputs {
  2688  					if countRegs(j.regs) != 1 {
  2689  						continue
  2690  					}
  2691  					desired.clobber(j.regs)
  2692  					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  2693  				}
  2694  				// Set desired register of input 0 if this is a 2-operand instruction.
  2695  				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  2696  					// ADDQconst is added here because we want to treat it as resultInArg0 for
  2697  					// the purposes of desired registers, even though it is not an absolute requirement.
  2698  					// This is because we'd rather implement it as ADDQ instead of LEAQ.
  2699  					// Same for ADDLconst
  2700  					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
  2701  					if opcodeTable[v.Op].commutative {
  2702  						desired.addList(v.Args[1].ID, prefs)
  2703  					}
  2704  					desired.addList(v.Args[0].ID, prefs)
  2705  				}
  2706  			}
  2707  
  2708  			// For each predecessor of b, expand its list of live-at-end values.
  2709  			// invariant: live contains the values live at the start of b (excluding phi inputs)
  2710  			for i, e := range b.Preds {
  2711  				p := e.b
  2712  				// Compute additional distance for the edge.
  2713  				// Note: delta must be at least 1 to distinguish the control
  2714  				// value use from the first user in a successor block.
  2715  				delta := int32(normalDistance)
  2716  				if len(p.Succs) == 2 {
  2717  					if p.Succs[0].b == b && p.Likely == BranchLikely ||
  2718  						p.Succs[1].b == b && p.Likely == BranchUnlikely {
  2719  						delta = likelyDistance
  2720  					}
  2721  					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
  2722  						p.Succs[1].b == b && p.Likely == BranchLikely {
  2723  						delta = unlikelyDistance
  2724  					}
  2725  				}
  2726  
  2727  				// Update any desired registers at the end of p.
  2728  				s.desired[p.ID].merge(&desired)
  2729  
  2730  				// Start t off with the previously known live values at the end of p.
  2731  				t.clear()
  2732  				for _, e := range s.live[p.ID] {
  2733  					t.set(e.ID, e.dist, e.pos)
  2734  				}
  2735  				update := false
  2736  
  2737  				// Add new live values from scanning this block.
  2738  				for _, e := range live.contents() {
  2739  					d := e.val + delta
  2740  					if !t.contains(e.key) || d < t.get(e.key) {
  2741  						update = true
  2742  						t.set(e.key, d, e.pos)
  2743  					}
  2744  				}
  2745  				// Also add the correct arg from the saved phi values.
  2746  				// All phis are at distance delta (we consider them
  2747  				// simultaneously happening at the start of the block).
  2748  				for _, v := range phis {
  2749  					id := v.Args[i].ID
  2750  					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
  2751  						update = true
  2752  						t.set(id, delta, v.Pos)
  2753  					}
  2754  				}
  2755  
  2756  				if !update {
  2757  					continue
  2758  				}
  2759  				// The live set has changed, update it.
  2760  				l := s.live[p.ID][:0]
  2761  				if cap(l) < t.size() {
  2762  					l = make([]liveInfo, 0, t.size())
  2763  				}
  2764  				for _, e := range t.contents() {
  2765  					l = append(l, liveInfo{e.key, e.val, e.pos})
  2766  				}
  2767  				s.live[p.ID] = l
  2768  				changed = true
  2769  			}
  2770  		}
  2771  
  2772  		if !changed {
  2773  			break
  2774  		}
  2775  	}
  2776  	if f.pass.debug > regDebug {
  2777  		fmt.Println("live values at end of each block")
  2778  		for _, b := range f.Blocks {
  2779  			fmt.Printf("  %s:", b)
  2780  			for _, x := range s.live[b.ID] {
  2781  				fmt.Printf(" v%d(%d)", x.ID, x.dist)
  2782  				for _, e := range s.desired[b.ID].entries {
  2783  					if e.ID != x.ID {
  2784  						continue
  2785  					}
  2786  					fmt.Printf("[")
  2787  					first := true
  2788  					for _, r := range e.regs {
  2789  						if r == noRegister {
  2790  							continue
  2791  						}
  2792  						if !first {
  2793  							fmt.Printf(",")
  2794  						}
  2795  						fmt.Print(&s.registers[r])
  2796  						first = false
  2797  					}
  2798  					fmt.Printf("]")
  2799  				}
  2800  			}
  2801  			if avoid := s.desired[b.ID].avoid; avoid != 0 {
  2802  				fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  2803  			}
  2804  			fmt.Println()
  2805  		}
  2806  	}
  2807  }
  2808  
  2809  // A desiredState represents desired register assignments.
  2810  type desiredState struct {
  2811  	// Desired assignments will be small, so we just use a list
  2812  	// of valueID+registers entries.
  2813  	entries []desiredStateEntry
  2814  	// Registers that other values want to be in.  This value will
  2815  	// contain at least the union of the regs fields of entries, but
  2816  	// may contain additional entries for values that were once in
  2817  	// this data structure but are no longer.
  2818  	avoid regMask
  2819  }
  2820  type desiredStateEntry struct {
  2821  	// (pre-regalloc) value
  2822  	ID ID
  2823  	// Registers it would like to be in, in priority order.
  2824  	// Unused slots are filled with noRegister.
  2825  	// For opcodes that return tuples, we track desired registers only
  2826  	// for the first element of the tuple.
  2827  	regs [4]register
  2828  }
  2829  
  2830  func (d *desiredState) clear() {
  2831  	d.entries = d.entries[:0]
  2832  	d.avoid = 0
  2833  }
  2834  
  2835  // get returns a list of desired registers for value vid.
  2836  func (d *desiredState) get(vid ID) [4]register {
  2837  	for _, e := range d.entries {
  2838  		if e.ID == vid {
  2839  			return e.regs
  2840  		}
  2841  	}
  2842  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2843  }
  2844  
  2845  // add records that we'd like value vid to be in register r.
  2846  func (d *desiredState) add(vid ID, r register) {
  2847  	d.avoid |= regMask(1) << r
  2848  	for i := range d.entries {
  2849  		e := &d.entries[i]
  2850  		if e.ID != vid {
  2851  			continue
  2852  		}
  2853  		if e.regs[0] == r {
  2854  			// Already known and highest priority
  2855  			return
  2856  		}
  2857  		for j := 1; j < len(e.regs); j++ {
  2858  			if e.regs[j] == r {
  2859  				// Move from lower priority to top priority
  2860  				copy(e.regs[1:], e.regs[:j])
  2861  				e.regs[0] = r
  2862  				return
  2863  			}
  2864  		}
  2865  		copy(e.regs[1:], e.regs[:])
  2866  		e.regs[0] = r
  2867  		return
  2868  	}
  2869  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  2870  }
  2871  
  2872  func (d *desiredState) addList(vid ID, regs [4]register) {
  2873  	// regs is in priority order, so iterate in reverse order.
  2874  	for i := len(regs) - 1; i >= 0; i-- {
  2875  		r := regs[i]
  2876  		if r != noRegister {
  2877  			d.add(vid, r)
  2878  		}
  2879  	}
  2880  }
  2881  
  2882  // clobber erases any desired registers in the set m.
  2883  func (d *desiredState) clobber(m regMask) {
  2884  	for i := 0; i < len(d.entries); {
  2885  		e := &d.entries[i]
  2886  		j := 0
  2887  		for _, r := range e.regs {
  2888  			if r != noRegister && m>>r&1 == 0 {
  2889  				e.regs[j] = r
  2890  				j++
  2891  			}
  2892  		}
  2893  		if j == 0 {
  2894  			// No more desired registers for this value.
  2895  			d.entries[i] = d.entries[len(d.entries)-1]
  2896  			d.entries = d.entries[:len(d.entries)-1]
  2897  			continue
  2898  		}
  2899  		for ; j < len(e.regs); j++ {
  2900  			e.regs[j] = noRegister
  2901  		}
  2902  		i++
  2903  	}
  2904  	d.avoid &^= m
  2905  }
  2906  
  2907  // copy copies a desired state from another desiredState x.
  2908  func (d *desiredState) copy(x *desiredState) {
  2909  	d.entries = append(d.entries[:0], x.entries...)
  2910  	d.avoid = x.avoid
  2911  }
  2912  
  2913  // remove removes the desired registers for vid and returns them.
  2914  func (d *desiredState) remove(vid ID) [4]register {
  2915  	for i := range d.entries {
  2916  		if d.entries[i].ID == vid {
  2917  			regs := d.entries[i].regs
  2918  			d.entries[i] = d.entries[len(d.entries)-1]
  2919  			d.entries = d.entries[:len(d.entries)-1]
  2920  			return regs
  2921  		}
  2922  	}
  2923  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2924  }
  2925  
  2926  // merge merges another desired state x into d.
  2927  func (d *desiredState) merge(x *desiredState) {
  2928  	d.avoid |= x.avoid
  2929  	// There should only be a few desired registers, so
  2930  	// linear insert is ok.
  2931  	for _, e := range x.entries {
  2932  		d.addList(e.ID, e.regs)
  2933  	}
  2934  }
  2935  
  2936  func min32(x, y int32) int32 {
  2937  	if x < y {
  2938  		return x
  2939  	}
  2940  	return y
  2941  }
  2942  func max32(x, y int32) int32 {
  2943  	if x > y {
  2944  		return x
  2945  	}
  2946  	return y
  2947  }
  2948  

View as plain text