PPC64Ops.go

Documentation: cmd/compile/internal/ssa/_gen

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Less-than-64-bit integer types live in the low portion of registers.
    11  //    The upper portion is junk.
    12  //  - Boolean types are zero or 1; stored in a byte, with upper bytes of the register containing junk.
    13  //  - *const instructions may use a constant larger than the instruction can encode.
    14  //    In this case the assembler expands to multiple instructions and uses tmp
    15  //    register (R31).
    16  
    17  var regNamesPPC64 = []string{
    18  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    19  	"SP", // REGSP
    20  	"SB", // REGSB
    21  	"R3",
    22  	"R4",
    23  	"R5",
    24  	"R6",
    25  	"R7",
    26  	"R8",
    27  	"R9",
    28  	"R10",
    29  	"R11", // REGCTXT for closures
    30  	"R12",
    31  	"R13", // REGTLS
    32  	"R14",
    33  	"R15",
    34  	"R16",
    35  	"R17",
    36  	"R18",
    37  	"R19",
    38  	"R20",
    39  	"R21",
    40  	"R22",
    41  	"R23",
    42  	"R24",
    43  	"R25",
    44  	"R26",
    45  	"R27",
    46  	"R28",
    47  	"R29",
    48  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    49  	"R31", // REGTMP
    50  
    51  	"F0",
    52  	"F1",
    53  	"F2",
    54  	"F3",
    55  	"F4",
    56  	"F5",
    57  	"F6",
    58  	"F7",
    59  	"F8",
    60  	"F9",
    61  	"F10",
    62  	"F11",
    63  	"F12",
    64  	"F13",
    65  	"F14",
    66  	"F15",
    67  	"F16",
    68  	"F17",
    69  	"F18",
    70  	"F19",
    71  	"F20",
    72  	"F21",
    73  	"F22",
    74  	"F23",
    75  	"F24",
    76  	"F25",
    77  	"F26",
    78  	"F27",
    79  	"F28",
    80  	"F29",
    81  	"F30",
    82  	// "F31", the allocator is limited to 64 entries. We sacrifice this FPR to support XER.
    83  
    84  	"XER",
    85  
    86  	// If you add registers, update asyncPreempt in runtime.
    87  
    88  	// "CR0",
    89  	// "CR1",
    90  	// "CR2",
    91  	// "CR3",
    92  	// "CR4",
    93  	// "CR5",
    94  	// "CR6",
    95  	// "CR7",
    96  
    97  	// "CR",
    98  	// "LR",
    99  	// "CTR",
   100  }
   101  
   102  func init() {
   103  	// Make map from reg names to reg integers.
   104  	if len(regNamesPPC64) > 64 {
   105  		panic("too many registers")
   106  	}
   107  	num := map[string]int{}
   108  	for i, name := range regNamesPPC64 {
   109  		num[name] = i
   110  	}
   111  	buildReg := func(s string) regMask {
   112  		m := regMask(0)
   113  		for _, r := range strings.Split(s, " ") {
   114  			if n, ok := num[r]; ok {
   115  				m |= regMask(1) << uint(n)
   116  				continue
   117  			}
   118  			panic("register " + r + " not found")
   119  		}
   120  		return m
   121  	}
   122  
   123  	var (
   124  		gp  = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   125  		fp  = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30")
   126  		sp  = buildReg("SP")
   127  		sb  = buildReg("SB")
   128  		gr  = buildReg("g")
   129  		xer = buildReg("XER")
   130  		// cr  = buildReg("CR")
   131  		// ctr = buildReg("CTR")
   132  		// lr  = buildReg("LR")
   133  		tmp     = buildReg("R31")
   134  		ctxt    = buildReg("R11")
   135  		callptr = buildReg("R12")
   136  		// tls = buildReg("R13")
   137  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   138  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   139  		xergp       = regInfo{inputs: []regMask{xer}, outputs: []regMask{gp}, clobbers: xer}
   140  		gp11cxer    = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   141  		gp11xer     = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp, xer}}
   142  		gp1xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   143  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   144  		gp21a0      = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
   145  		gp21cxer    = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   146  		gp21xer     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer}
   147  		gp2xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   148  		gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   149  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   150  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   151  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   152  		crgp11      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
   153  		crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
   154  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   155  		gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   156  		prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
   157  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   158  		gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   159  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   160  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   161  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   162  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   163  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   164  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   165  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   166  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   167  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   168  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   169  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   170  		fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   171  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   172  		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   173  		callerSave  = regMask(gp | fp | gr | xer)
   174  		r3          = buildReg("R3")
   175  		r4          = buildReg("R4")
   176  		r5          = buildReg("R5")
   177  		r6          = buildReg("R6")
   178  	)
   179  	ops := []opData{
   180  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},                              // arg0 + arg1
   181  		{name: "ADDCC", argLength: 2, reg: gp21, asm: "ADDCC", commutative: true, typ: "(Int,Flags)"},      // arg0 + arg1
   182  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},                              // arg0 + auxInt
   183  		{name: "ADDCCconst", argLength: 1, reg: gp11cxer, asm: "ADDCCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0 + auxInt sets CC, clobbers XER
   184  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},                            // arg0+arg1
   185  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true},                          // arg0+arg1
   186  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                                                 // arg0-arg1
   187  		{name: "SUBCC", argLength: 2, reg: gp21, asm: "SUBCC", typ: "(Int,Flags)"},                         // arg0-arg1 sets CC
   188  		{name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"},                       // auxInt - arg0 (carry is ignored)
   189  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                                               // arg0-arg1
   190  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                                             // arg0-arg1
   191  
   192  		// Note, the FPU works with float64 in register.
   193  		{name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
   194  		{name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
   195  
   196  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   197  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   198  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   199  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   200  		{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
   201  
   202  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
   203  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
   204  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
   205  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
   206  
   207  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   208  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   209  
   210  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   211  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   212  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   213  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   214  
   215  		{name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!)
   216  		{name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width
   217  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},       // unsigned arg0 >> (arg1&127), 64 bit width
   218  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},       // unsigned arg0 >> (arg1&63), 32 bit width
   219  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},       // arg0 << (arg1&127), 64 bit width
   220  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},       // arg0 << (arg1&63), 32 bit width
   221  
   222  		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   223  		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   224  		// The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA.
   225  		// The constant shift values are packed into the aux int32.
   226  		{name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, //
   227  		{name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, //
   228  
   229  		// Operations which consume or generate the CA (xer)
   230  		{name: "ADDC", argLength: 2, reg: gp21xer, asm: "ADDC", commutative: true, typ: "(UInt64, UInt64)"},    // arg0 + arg1 -> out, CA
   231  		{name: "SUBC", argLength: 2, reg: gp21xer, asm: "SUBC", typ: "(UInt64, UInt64)"},                       // arg0 - arg1 -> out, CA
   232  		{name: "ADDCconst", argLength: 1, reg: gp11xer, asm: "ADDC", typ: "(UInt64, UInt64)", aux: "Int64"},    // arg0 + imm16 -> out, CA
   233  		{name: "SUBCconst", argLength: 1, reg: gp11xer, asm: "SUBC", typ: "(UInt64, UInt64)", aux: "Int64"},    // imm16 - arg0 -> out, CA
   234  		{name: "ADDE", argLength: 3, reg: gp2xer1xer, asm: "ADDE", typ: "(UInt64, UInt64)", commutative: true}, // arg0 + arg1 + CA (arg2) -> out, CA
   235  		{name: "ADDZE", argLength: 2, reg: gp1xer1xer, asm: "ADDZE", typ: "(UInt64, UInt64)"},                  // arg0 + CA (arg1) -> out, CA
   236  		{name: "SUBE", argLength: 3, reg: gp2xer1xer, asm: "SUBE", typ: "(UInt64, UInt64)"},                    // arg0 - arg1 - CA (arg2) -> out, CA
   237  		{name: "ADDZEzero", argLength: 1, reg: xergp, asm: "ADDZE", typ: "UInt64"},                             // CA (arg0) + $0 -> out
   238  		{name: "SUBZEzero", argLength: 1, reg: xergp, asm: "SUBZE", typ: "UInt64"},                             // $0 - CA (arg0) -> out
   239  
   240  		{name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   241  		{name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   242  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   243  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   244  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width
   245  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width
   246  
   247  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   248  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   249  		{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
   250  
   251  		{name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"},                           // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
   252  		{name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"},                            // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
   253  		{name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true},      // "rlwimi" similar aux encoding as above
   254  		{name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"},                          // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63.
   255  		{name: "RLDICLCC", argLength: 1, reg: gp11, asm: "RLDICLCC", aux: "Int64", typ: "(Int, Flags)"}, // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63. Sets CC.
   256  		{name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"},                          // Likewise, but only ME and SH are valid. MB is always 0.
   257  
   258  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD"},                          // count leading zeros
   259  		{name: "CNTLZDCC", argLength: 1, reg: gp11, asm: "CNTLZDCC", typ: "(Int, Flags)"}, // count leading zeros, sets CC
   260  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW"},                          // count leading zeros (32 bit)
   261  
   262  		{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
   263  		{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
   264  
   265  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   266  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   267  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte
   268  
   269  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   270  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   271  
   272  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   273  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   274  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   275  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   276  
   277  		{name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
   278  		{name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
   279  		{name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
   280  		{name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
   281  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   282  
   283  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   284  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   285  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   286  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   287  		{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   288  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   289  
   290  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   291  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   292  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
   293  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   294  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   295  
   296  		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   297  		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   298  
   299  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                           // arg0&arg1
   300  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                            // arg0&^arg1
   301  		{name: "ANDNCC", argLength: 2, reg: gp21, asm: "ANDNCC", typ: "(Int64,Flags)"},                  // arg0&^arg1 sets CC
   302  		{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
   303  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                             // arg0|arg1
   304  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                              // arg0|^arg1
   305  		{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "(Int,Flags)"},     // arg0|arg1 sets CC
   306  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                           // ^(arg0|arg1)
   307  		{name: "NORCC", argLength: 2, reg: gp21, asm: "NORCC", commutative: true, typ: "(Int,Flags)"},   // ^(arg0|arg1) sets CC
   308  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},             // arg0^arg1
   309  		{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "(Int,Flags)"},   // arg0^arg1 sets CC
   310  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},             // arg0^^arg1
   311  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                              // -arg0 (integer)
   312  		{name: "NEGCC", argLength: 1, reg: gp11, asm: "NEGCC", typ: "(Int,Flags)"},                      // -arg0 (integer) sets CC
   313  		{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"},                                              // reversebytes64(arg0)
   314  		{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"},                                              // reversebytes32(arg0)
   315  		{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"},                                              // reversebytes16(arg0)
   316  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                            // -arg0 (floating point)
   317  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                          // sqrt(arg0) (floating point)
   318  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                        // sqrt(arg0) (floating point, single precision)
   319  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                          // floor(arg0), float64
   320  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                           // ceil(arg0), float64
   321  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                          // trunc(arg0), float64
   322  		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                          // round(arg0), float64
   323  		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                            // abs(arg0), float64
   324  		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                          // -abs(arg0), float64
   325  		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                        // copysign arg0 -> arg1, float64
   326  
   327  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                                 // arg0|aux
   328  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                               // arg0^aux
   329  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", typ: "(Int,Flags)"},           // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   330  		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, clobberFlags: true, asm: "ANDCC", aux: "Int64", typ: "Int"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   331  
   332  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   333  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   334  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   335  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   336  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   337  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   338  
   339  		// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   340  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   341  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   342  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   343  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   344  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   345  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   346  
   347  		// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   348  		// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   349  		// In these cases the index register field is set to 0 and the full address is in the base register.
   350  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
   351  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
   352  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
   353  
   354  		// In these cases an index register is used in addition to a base register
   355  		// Loads from memory location arg[0] + arg[1].
   356  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"},  // zero extend uint8 to uint64
   357  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"},    // sign extend int16 to int64
   358  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64
   359  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"},    // sign extend int32 to int64
   360  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64
   361  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"},
   362  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64
   363  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64
   364  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"},
   365  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
   366  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
   367  
   368  		// Prefetch instruction
   369  		// Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
   370  		{name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
   371  
   372  		// Store bytes in the reverse endian order of the arch into arg0.
   373  		// These are indexed stores with no offset field in the instruction so the auxint fields are not used.
   374  		{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
   375  		{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
   376  		{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
   377  
   378  		// Floating point loads from arg0+aux+auxint
   379  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   380  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   381  
   382  		// Store bytes in the endian order of the arch into arg0+aux+auxint
   383  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   384  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   385  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   386  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   387  
   388  		// Store floating point value into arg0+aux+auxint
   389  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   390  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   391  
   392  		// Stores using index and base registers
   393  		// Stores to arg[0] + arg[1]
   394  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"},     // store bye
   395  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"},     // store half word
   396  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"},     // store word
   397  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"},     // store double word
   398  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"},   // store double float
   399  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"},   // store single float
   400  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg
   401  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg
   402  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg
   403  
   404  		// The following ops store 0 into arg0+aux+auxint arg1=mem
   405  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   406  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   407  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   408  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   409  
   410  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   411  
   412  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   413  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   414  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   415  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   416  
   417  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   418  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   419  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   420  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   421  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   422  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   423  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   424  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   425  
   426  		// ISEL  arg2 ? arg0 : arg1
   427  		// ISELZ arg1 ? arg0 : $0
   428  		// auxInt values 0=LT 1=GT 2=EQ 3=SO (summary overflow/unordered) 4=GE 5=LE 6=NE 7=NSO (not summary overflow/not unordered)
   429  		// Note, auxInt^4 inverts the comparison condition. For example, LT^4 becomes GE, and "ISEL [a] x y z" is equivalent to ISEL [a^4] y x z".
   430  		{name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"},
   431  		{name: "ISELZ", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32"},
   432  
   433  		// SETBC auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 1 : 0
   434  		{name: "SETBC", argLength: 1, reg: crgp, asm: "SETBC", aux: "Int32", typ: "Int32"},
   435  		// SETBCR auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 0 : 1
   436  		{name: "SETBCR", argLength: 1, reg: crgp, asm: "SETBCR", aux: "Int32", typ: "Int32"},
   437  
   438  		// pseudo-ops
   439  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   440  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   441  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   442  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   443  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   444  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   445  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   446  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   447  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   448  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   449  
   450  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   451  		// and sorts it to the very beginning of the block to prevent other
   452  		// use of the closure pointer.
   453  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   454  
   455  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   456  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   457  
   458  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   459  		// I.e., if f calls g "calls" getcallerpc,
   460  		// the result should be the PC within f that g will return to.
   461  		// See runtime/stubs.go for a more detailed discussion.
   462  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   463  
   464  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   465  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   466  		// Round ops to block fused-multiply-add extraction.
   467  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   468  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   469  
   470  		{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                       // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   471  		{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                         // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   472  		{name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   473  		{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   474  
   475  		// large or unaligned zeroing
   476  		// arg0 = address of memory to zero (in R3, changed as side effect)
   477  		// returns mem
   478  		//
   479  		// a loop is generated when there is more than one iteration
   480  		// needed to clear 4 doublewords
   481  		//
   482  		//	XXLXOR	VS32,VS32,VS32
   483  		// 	MOVD	$len/32,R31
   484  		//	MOVD	R31,CTR
   485  		//	MOVD	$16,R31
   486  		//	loop:
   487  		//	STXVD2X VS32,(R0)(R3)
   488  		//	STXVD2X	VS32,(R31)(R3)
   489  		//	ADD	R3,32
   490  		//	BC	loop
   491  
   492  		// remaining doubleword clears generated as needed
   493  		//	MOVD	R0,(R3)
   494  		//	MOVD	R0,8(R3)
   495  		//	MOVD	R0,16(R3)
   496  		//	MOVD	R0,24(R3)
   497  
   498  		// one or more of these to clear remainder < 8 bytes
   499  		//	MOVW	R0,n1(R3)
   500  		//	MOVH	R0,n2(R3)
   501  		//	MOVB	R0,n3(R3)
   502  		{
   503  			name:      "LoweredZero",
   504  			aux:       "Int64",
   505  			argLength: 2,
   506  			reg: regInfo{
   507  				inputs:   []regMask{buildReg("R20")},
   508  				clobbers: buildReg("R20"),
   509  			},
   510  			clobberFlags:   true,
   511  			typ:            "Mem",
   512  			faultOnNilArg0: true,
   513  			unsafePoint:    true,
   514  		},
   515  		{
   516  			name:      "LoweredZeroShort",
   517  			aux:       "Int64",
   518  			argLength: 2,
   519  			reg: regInfo{
   520  				inputs: []regMask{gp}},
   521  			typ:            "Mem",
   522  			faultOnNilArg0: true,
   523  			unsafePoint:    true,
   524  		},
   525  		{
   526  			name:      "LoweredQuadZeroShort",
   527  			aux:       "Int64",
   528  			argLength: 2,
   529  			reg: regInfo{
   530  				inputs: []regMask{gp},
   531  			},
   532  			typ:            "Mem",
   533  			faultOnNilArg0: true,
   534  			unsafePoint:    true,
   535  		},
   536  		{
   537  			name:      "LoweredQuadZero",
   538  			aux:       "Int64",
   539  			argLength: 2,
   540  			reg: regInfo{
   541  				inputs:   []regMask{buildReg("R20")},
   542  				clobbers: buildReg("R20"),
   543  			},
   544  			clobberFlags:   true,
   545  			typ:            "Mem",
   546  			faultOnNilArg0: true,
   547  			unsafePoint:    true,
   548  		},
   549  
   550  		// R31 is temp register
   551  		// Loop code:
   552  		//	MOVD len/32,R31		set up loop ctr
   553  		//	MOVD R31,CTR
   554  		//	MOVD $16,R31		index register
   555  		// loop:
   556  		//	LXVD2X (R0)(R4),VS32
   557  		//	LXVD2X (R31)(R4),VS33
   558  		//	ADD  R4,$32          increment src
   559  		//	STXVD2X VS32,(R0)(R3)
   560  		//	STXVD2X VS33,(R31)(R3)
   561  		//	ADD  R3,$32          increment dst
   562  		//	BC 16,0,loop         branch ctr
   563  		// For this purpose, VS32 and VS33 are treated as
   564  		// scratch registers. Since regalloc does not
   565  		// track vector registers, even if it could be marked
   566  		// as clobbered it would have no effect.
   567  		// TODO: If vector registers are managed by regalloc
   568  		// mark these as clobbered.
   569  		//
   570  		// Bytes not moved by this loop are moved
   571  		// with a combination of the following instructions,
   572  		// starting with the largest sizes and generating as
   573  		// many as needed, using the appropriate offset value.
   574  		//	MOVD  n(R4),R14
   575  		//	MOVD  R14,n(R3)
   576  		//	MOVW  n1(R4),R14
   577  		//	MOVW  R14,n1(R3)
   578  		//	MOVH  n2(R4),R14
   579  		//	MOVH  R14,n2(R3)
   580  		//	MOVB  n3(R4),R14
   581  		//	MOVB  R14,n3(R3)
   582  
   583  		{
   584  			name:      "LoweredMove",
   585  			aux:       "Int64",
   586  			argLength: 3,
   587  			reg: regInfo{
   588  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   589  				clobbers: buildReg("R20 R21"),
   590  			},
   591  			clobberFlags:   true,
   592  			typ:            "Mem",
   593  			faultOnNilArg0: true,
   594  			faultOnNilArg1: true,
   595  			unsafePoint:    true,
   596  		},
   597  		{
   598  			name:      "LoweredMoveShort",
   599  			aux:       "Int64",
   600  			argLength: 3,
   601  			reg: regInfo{
   602  				inputs: []regMask{gp, gp},
   603  			},
   604  			typ:            "Mem",
   605  			faultOnNilArg0: true,
   606  			faultOnNilArg1: true,
   607  			unsafePoint:    true,
   608  		},
   609  
   610  		// The following is similar to the LoweredMove, but uses
   611  		// LXV instead of LXVD2X, which does not require an index
   612  		// register and will do 4 in a loop instead of only.
   613  		{
   614  			name:      "LoweredQuadMove",
   615  			aux:       "Int64",
   616  			argLength: 3,
   617  			reg: regInfo{
   618  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   619  				clobbers: buildReg("R20 R21"),
   620  			},
   621  			clobberFlags:   true,
   622  			typ:            "Mem",
   623  			faultOnNilArg0: true,
   624  			faultOnNilArg1: true,
   625  			unsafePoint:    true,
   626  		},
   627  
   628  		{
   629  			name:      "LoweredQuadMoveShort",
   630  			aux:       "Int64",
   631  			argLength: 3,
   632  			reg: regInfo{
   633  				inputs: []regMask{gp, gp},
   634  			},
   635  			typ:            "Mem",
   636  			faultOnNilArg0: true,
   637  			faultOnNilArg1: true,
   638  			unsafePoint:    true,
   639  		},
   640  
   641  		{name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   642  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   643  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   644  
   645  		{name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   646  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   647  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   648  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   649  
   650  		// atomic add32, 64
   651  		// LWSYNC
   652  		// LDAR         (Rarg0), Rout
   653  		// ADD		Rarg1, Rout
   654  		// STDCCC       Rout, (Rarg0)
   655  		// BNE          -3(PC)
   656  		// return new sum
   657  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   658  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   659  
   660  		// atomic exchange32, 64
   661  		// LWSYNC
   662  		// LDAR         (Rarg0), Rout
   663  		// STDCCC       Rarg1, (Rarg0)
   664  		// BNE          -2(PC)
   665  		// ISYNC
   666  		// return old val
   667  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   668  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   669  
   670  		// atomic compare and swap.
   671  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   672  		// if *arg0 == arg1 {
   673  		//   *arg0 = arg2
   674  		//   return (true, memory)
   675  		// } else {
   676  		//   return (false, memory)
   677  		// }
   678  		// SYNC
   679  		// LDAR		(Rarg0), Rtmp
   680  		// CMP		Rarg1, Rtmp
   681  		// BNE		3(PC)
   682  		// STDCCC	Rarg2, (Rarg0)
   683  		// BNE		-4(PC)
   684  		// CBNZ         Rtmp, -4(PC)
   685  		// CSET         EQ, Rout
   686  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   687  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   688  
   689  		// atomic 8/32 and/or.
   690  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   691  		// LBAR/LWAT	(Rarg0), Rtmp
   692  		// AND/OR	Rarg1, Rtmp
   693  		// STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
   694  		// BNE		Rtmp, -3(PC)
   695  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   696  		{name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   697  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   698  		{name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   699  
   700  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
   701  		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and R20 and R21,
   702  		// but may clobber anything else, including R31 (REGTMP).
   703  		// Returns a pointer to a write barrier buffer in R29.
   704  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
   705  
   706  		{name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory
   707  		// There are three of these functions so that they can have three different register inputs.
   708  		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
   709  		// default registers to match so we don't need to copy registers around unnecessarily.
   710  		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   711  		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   712  		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   713  
   714  		// (InvertFlags (CMP a b)) == (CMP b a)
   715  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   716  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   717  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   718  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   719  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   720  
   721  		// Constant flag values. For any comparison, there are 3 possible
   722  		// outcomes: either the three from the signed total order (<,==,>)
   723  		// or the three from the unsigned total order, depending on which
   724  		// comparison operation was used (CMP or CMPU -- PPC is different from
   725  		// the other architectures, which have a single comparison producing
   726  		// both signed and unsigned comparison results.)
   727  
   728  		// These ops are for temporary use by rewrite rules. They
   729  		// cannot appear in the generated assembly.
   730  		{name: "FlagEQ"}, // equal
   731  		{name: "FlagLT"}, // signed < or unsigned <
   732  		{name: "FlagGT"}, // signed > or unsigned >
   733  	}
   734  
   735  	blocks := []blockData{
   736  		{name: "EQ", controls: 1},
   737  		{name: "NE", controls: 1},
   738  		{name: "LT", controls: 1},
   739  		{name: "LE", controls: 1},
   740  		{name: "GT", controls: 1},
   741  		{name: "GE", controls: 1},
   742  		{name: "FLT", controls: 1},
   743  		{name: "FLE", controls: 1},
   744  		{name: "FGT", controls: 1},
   745  		{name: "FGE", controls: 1},
   746  	}
   747  
   748  	archs = append(archs, arch{
   749  		name:               "PPC64",
   750  		pkg:                "cmd/internal/obj/ppc64",
   751  		genfile:            "../../ppc64/ssa.go",
   752  		ops:                ops,
   753  		blocks:             blocks,
   754  		regnames:           regNamesPPC64,
   755  		ParamIntRegNames:   "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17",
   756  		ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12",
   757  		gpregmask:          gp,
   758  		fpregmask:          fp,
   759  		specialregmask:     xer,
   760  		framepointerreg:    -1,
   761  		linkreg:            -1, // not used
   762  	})
   763  }
   764
View as plain text