...

Source file src/cmd/internal/obj/x86/asm6.go

Documentation: cmd/internal/obj/x86

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  const (
    63  	loopAlign  = 16
    64  	maxLoopPad = 0
    65  )
    66  
    67  // Bit flags that are used to express jump target properties.
    68  const (
    69  	// branchBackwards marks targets that are located behind.
    70  	// Used to express jumps to loop headers.
    71  	branchBackwards = (1 << iota)
    72  	// branchShort marks branches those target is close,
    73  	// with offset is in -128..127 range.
    74  	branchShort
    75  	// branchLoopHead marks loop entry.
    76  	// Used to insert padding for misaligned loops.
    77  	branchLoopHead
    78  )
    79  
    80  // opBytes holds optab encoding bytes.
    81  // Each ytab reserves fixed amount of bytes in this array.
    82  //
    83  // The size should be the minimal number of bytes that
    84  // are enough to hold biggest optab op lines.
    85  type opBytes [31]uint8
    86  
    87  type Optab struct {
    88  	as     obj.As
    89  	ytab   []ytab
    90  	prefix uint8
    91  	op     opBytes
    92  }
    93  
    94  type movtab struct {
    95  	as   obj.As
    96  	ft   uint8
    97  	f3t  uint8
    98  	tt   uint8
    99  	code uint8
   100  	op   [4]uint8
   101  }
   102  
   103  const (
   104  	Yxxx = iota
   105  	Ynone
   106  	Yi0 // $0
   107  	Yi1 // $1
   108  	Yu2 // $x, x fits in uint2
   109  	Yi8 // $x, x fits in int8
   110  	Yu8 // $x, x fits in uint8
   111  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   112  	Ys32
   113  	Yi32
   114  	Yi64
   115  	Yiauto
   116  	Yal
   117  	Ycl
   118  	Yax
   119  	Ycx
   120  	Yrb
   121  	Yrl
   122  	Yrl32 // Yrl on 32-bit system
   123  	Yrf
   124  	Yf0
   125  	Yrx
   126  	Ymb
   127  	Yml
   128  	Ym
   129  	Ybr
   130  	Ycs
   131  	Yss
   132  	Yds
   133  	Yes
   134  	Yfs
   135  	Ygs
   136  	Ygdtr
   137  	Yidtr
   138  	Yldtr
   139  	Ymsw
   140  	Ytask
   141  	Ycr0
   142  	Ycr1
   143  	Ycr2
   144  	Ycr3
   145  	Ycr4
   146  	Ycr5
   147  	Ycr6
   148  	Ycr7
   149  	Ycr8
   150  	Ydr0
   151  	Ydr1
   152  	Ydr2
   153  	Ydr3
   154  	Ydr4
   155  	Ydr5
   156  	Ydr6
   157  	Ydr7
   158  	Ytr0
   159  	Ytr1
   160  	Ytr2
   161  	Ytr3
   162  	Ytr4
   163  	Ytr5
   164  	Ytr6
   165  	Ytr7
   166  	Ymr
   167  	Ymm
   168  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   169  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   170  	Yxr           // X0..X15
   171  	YxrEvex       // X0..X31
   172  	Yxm
   173  	YxmEvex       // YxrEvex+Ym
   174  	Yxvm          // VSIB vector array; vm32x/vm64x
   175  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   176  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   177  	Yyr           // Y0..Y15
   178  	YyrEvex       // Y0..Y31
   179  	Yym
   180  	YymEvex   // YyrEvex+Ym
   181  	Yyvm      // VSIB vector array; vm32y/vm64y
   182  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   183  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   184  	Yzr       // Z0..Z31
   185  	Yzm       // Yzr+Ym
   186  	Yzvm      // VSIB vector array; vm32z/vm64z
   187  	Yk0       // K0
   188  	Yknot0    // K1..K7; write mask
   189  	Yk        // K0..K7; used for KOP
   190  	Ykm       // Yk+Ym; used for KOP
   191  	Ytls
   192  	Ytextsize
   193  	Yindir
   194  	Ymax
   195  )
   196  
   197  const (
   198  	Zxxx = iota
   199  	Zlit
   200  	Zlitm_r
   201  	Zlitr_m
   202  	Zlit_m_r
   203  	Z_rp
   204  	Zbr
   205  	Zcall
   206  	Zcallcon
   207  	Zcallduff
   208  	Zcallind
   209  	Zcallindreg
   210  	Zib_
   211  	Zib_rp
   212  	Zibo_m
   213  	Zibo_m_xm
   214  	Zil_
   215  	Zil_rp
   216  	Ziq_rp
   217  	Zilo_m
   218  	Zjmp
   219  	Zjmpcon
   220  	Zloop
   221  	Zo_iw
   222  	Zm_o
   223  	Zm_r
   224  	Z_m_r
   225  	Zm2_r
   226  	Zm_r_xm
   227  	Zm_r_i_xm
   228  	Zm_r_xm_nr
   229  	Zr_m_xm_nr
   230  	Zibm_r // mmx1,mmx2/mem64,imm8
   231  	Zibr_m
   232  	Zmb_r
   233  	Zaut_r
   234  	Zo_m
   235  	Zo_m64
   236  	Zpseudo
   237  	Zr_m
   238  	Zr_m_xm
   239  	Zrp_
   240  	Z_ib
   241  	Z_il
   242  	Zm_ibo
   243  	Zm_ilo
   244  	Zib_rr
   245  	Zil_rr
   246  	Zbyte
   247  
   248  	Zvex_rm_v_r
   249  	Zvex_rm_v_ro
   250  	Zvex_r_v_rm
   251  	Zvex_i_rm_vo
   252  	Zvex_v_rm_r
   253  	Zvex_i_rm_r
   254  	Zvex_i_r_v
   255  	Zvex_i_rm_v_r
   256  	Zvex
   257  	Zvex_rm_r_vo
   258  	Zvex_i_r_rm
   259  	Zvex_hr_rm_v_r
   260  
   261  	Zevex_first
   262  	Zevex_i_r_k_rm
   263  	Zevex_i_r_rm
   264  	Zevex_i_rm_k_r
   265  	Zevex_i_rm_k_vo
   266  	Zevex_i_rm_r
   267  	Zevex_i_rm_v_k_r
   268  	Zevex_i_rm_v_r
   269  	Zevex_i_rm_vo
   270  	Zevex_k_rmo
   271  	Zevex_r_k_rm
   272  	Zevex_r_v_k_rm
   273  	Zevex_r_v_rm
   274  	Zevex_rm_k_r
   275  	Zevex_rm_v_k_r
   276  	Zevex_rm_v_r
   277  	Zevex_last
   278  
   279  	Zmax
   280  )
   281  
   282  const (
   283  	Px   = 0
   284  	Px1  = 1    // symbolic; exact value doesn't matter
   285  	P32  = 0x32 // 32-bit only
   286  	Pe   = 0x66 // operand escape
   287  	Pm   = 0x0f // 2byte opcode escape
   288  	Pq   = 0xff // both escapes: 66 0f
   289  	Pb   = 0xfe // byte operands
   290  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   291  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   292  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   293  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   294  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   295  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   296  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   297  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   298  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   299  	Pw   = 0x48 // Rex.w
   300  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   301  	Py   = 0x80 // defaults to 64-bit mode
   302  	Py1  = 0x81 // symbolic; exact value doesn't matter
   303  	Py3  = 0x83 // symbolic; exact value doesn't matter
   304  	Pavx = 0x84 // symbolic; exact value doesn't matter
   305  
   306  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   307  	Rxw     = 1 << 3 // =1, 64-bit operand size
   308  	Rxr     = 1 << 2 // extend modrm reg
   309  	Rxx     = 1 << 1 // extend sib index
   310  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   311  )
   312  
   313  const (
   314  	// Encoding for VEX prefix in tables.
   315  	// The P, L, and W fields are chosen to match
   316  	// their eventual locations in the VEX prefix bytes.
   317  
   318  	// Encoding for VEX prefix in tables.
   319  	// The P, L, and W fields are chosen to match
   320  	// their eventual locations in the VEX prefix bytes.
   321  
   322  	// Using spare bit to make leading [E]VEX encoding byte different from
   323  	// 0x0f even if all other VEX fields are 0.
   324  	avxEscape = 1 << 6
   325  
   326  	// P field - 2 bits
   327  	vex66 = 1 << 0
   328  	vexF3 = 2 << 0
   329  	vexF2 = 3 << 0
   330  	// L field - 1 bit
   331  	vexLZ  = 0 << 2
   332  	vexLIG = 0 << 2
   333  	vex128 = 0 << 2
   334  	vex256 = 1 << 2
   335  	// W field - 1 bit
   336  	vexWIG = 0 << 7
   337  	vexW0  = 0 << 7
   338  	vexW1  = 1 << 7
   339  	// M field - 5 bits, but mostly reserved; we can store up to 3
   340  	vex0F   = 1 << 3
   341  	vex0F38 = 2 << 3
   342  	vex0F3A = 3 << 3
   343  )
   344  
   345  var ycover [Ymax * Ymax]uint8
   346  
   347  var reg [MAXREG]int
   348  
   349  var regrex [MAXREG + 1]int
   350  
   351  var ynone = []ytab{
   352  	{Zlit, 1, argList{}},
   353  }
   354  
   355  var ytext = []ytab{
   356  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   357  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   358  }
   359  
   360  var ynop = []ytab{
   361  	{Zpseudo, 0, argList{}},
   362  	{Zpseudo, 0, argList{Yiauto}},
   363  	{Zpseudo, 0, argList{Yml}},
   364  	{Zpseudo, 0, argList{Yrf}},
   365  	{Zpseudo, 0, argList{Yxr}},
   366  	{Zpseudo, 0, argList{Yiauto}},
   367  	{Zpseudo, 0, argList{Yml}},
   368  	{Zpseudo, 0, argList{Yrf}},
   369  	{Zpseudo, 1, argList{Yxr}},
   370  }
   371  
   372  var yfuncdata = []ytab{
   373  	{Zpseudo, 0, argList{Yi32, Ym}},
   374  }
   375  
   376  var ypcdata = []ytab{
   377  	{Zpseudo, 0, argList{Yi32, Yi32}},
   378  }
   379  
   380  var yxorb = []ytab{
   381  	{Zib_, 1, argList{Yi32, Yal}},
   382  	{Zibo_m, 2, argList{Yi32, Ymb}},
   383  	{Zr_m, 1, argList{Yrb, Ymb}},
   384  	{Zm_r, 1, argList{Ymb, Yrb}},
   385  }
   386  
   387  var yaddl = []ytab{
   388  	{Zibo_m, 2, argList{Yi8, Yml}},
   389  	{Zil_, 1, argList{Yi32, Yax}},
   390  	{Zilo_m, 2, argList{Yi32, Yml}},
   391  	{Zr_m, 1, argList{Yrl, Yml}},
   392  	{Zm_r, 1, argList{Yml, Yrl}},
   393  }
   394  
   395  var yincl = []ytab{
   396  	{Z_rp, 1, argList{Yrl}},
   397  	{Zo_m, 2, argList{Yml}},
   398  }
   399  
   400  var yincq = []ytab{
   401  	{Zo_m, 2, argList{Yml}},
   402  }
   403  
   404  var ycmpb = []ytab{
   405  	{Z_ib, 1, argList{Yal, Yi32}},
   406  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   407  	{Zm_r, 1, argList{Ymb, Yrb}},
   408  	{Zr_m, 1, argList{Yrb, Ymb}},
   409  }
   410  
   411  var ycmpl = []ytab{
   412  	{Zm_ibo, 2, argList{Yml, Yi8}},
   413  	{Z_il, 1, argList{Yax, Yi32}},
   414  	{Zm_ilo, 2, argList{Yml, Yi32}},
   415  	{Zm_r, 1, argList{Yml, Yrl}},
   416  	{Zr_m, 1, argList{Yrl, Yml}},
   417  }
   418  
   419  var yshb = []ytab{
   420  	{Zo_m, 2, argList{Yi1, Ymb}},
   421  	{Zibo_m, 2, argList{Yu8, Ymb}},
   422  	{Zo_m, 2, argList{Ycx, Ymb}},
   423  }
   424  
   425  var yshl = []ytab{
   426  	{Zo_m, 2, argList{Yi1, Yml}},
   427  	{Zibo_m, 2, argList{Yu8, Yml}},
   428  	{Zo_m, 2, argList{Ycl, Yml}},
   429  	{Zo_m, 2, argList{Ycx, Yml}},
   430  }
   431  
   432  var ytestl = []ytab{
   433  	{Zil_, 1, argList{Yi32, Yax}},
   434  	{Zilo_m, 2, argList{Yi32, Yml}},
   435  	{Zr_m, 1, argList{Yrl, Yml}},
   436  	{Zm_r, 1, argList{Yml, Yrl}},
   437  }
   438  
   439  var ymovb = []ytab{
   440  	{Zr_m, 1, argList{Yrb, Ymb}},
   441  	{Zm_r, 1, argList{Ymb, Yrb}},
   442  	{Zib_rp, 1, argList{Yi32, Yrb}},
   443  	{Zibo_m, 2, argList{Yi32, Ymb}},
   444  }
   445  
   446  var ybtl = []ytab{
   447  	{Zibo_m, 2, argList{Yi8, Yml}},
   448  	{Zr_m, 1, argList{Yrl, Yml}},
   449  }
   450  
   451  var ymovw = []ytab{
   452  	{Zr_m, 1, argList{Yrl, Yml}},
   453  	{Zm_r, 1, argList{Yml, Yrl}},
   454  	{Zil_rp, 1, argList{Yi32, Yrl}},
   455  	{Zilo_m, 2, argList{Yi32, Yml}},
   456  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   457  }
   458  
   459  var ymovl = []ytab{
   460  	{Zr_m, 1, argList{Yrl, Yml}},
   461  	{Zm_r, 1, argList{Yml, Yrl}},
   462  	{Zil_rp, 1, argList{Yi32, Yrl}},
   463  	{Zilo_m, 2, argList{Yi32, Yml}},
   464  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   465  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   466  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   467  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   468  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   469  }
   470  
   471  var yret = []ytab{
   472  	{Zo_iw, 1, argList{}},
   473  	{Zo_iw, 1, argList{Yi32}},
   474  }
   475  
   476  var ymovq = []ytab{
   477  	// valid in 32-bit mode
   478  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   479  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   480  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   481  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   482  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   483  
   484  	// valid only in 64-bit mode, usually with 64-bit prefix
   485  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   486  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   487  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   488  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   489  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   490  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   491  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   492  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   493  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   494  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   495  }
   496  
   497  var ymovbe = []ytab{
   498  	{Zlitm_r, 3, argList{Ym, Yrl}},
   499  	{Zlitr_m, 3, argList{Yrl, Ym}},
   500  }
   501  
   502  var ym_rl = []ytab{
   503  	{Zm_r, 1, argList{Ym, Yrl}},
   504  }
   505  
   506  var yrl_m = []ytab{
   507  	{Zr_m, 1, argList{Yrl, Ym}},
   508  }
   509  
   510  var ymb_rl = []ytab{
   511  	{Zmb_r, 1, argList{Ymb, Yrl}},
   512  }
   513  
   514  var yml_rl = []ytab{
   515  	{Zm_r, 1, argList{Yml, Yrl}},
   516  }
   517  
   518  var yrl_ml = []ytab{
   519  	{Zr_m, 1, argList{Yrl, Yml}},
   520  }
   521  
   522  var yml_mb = []ytab{
   523  	{Zr_m, 1, argList{Yrb, Ymb}},
   524  	{Zm_r, 1, argList{Ymb, Yrb}},
   525  }
   526  
   527  var yrb_mb = []ytab{
   528  	{Zr_m, 1, argList{Yrb, Ymb}},
   529  }
   530  
   531  var yxchg = []ytab{
   532  	{Z_rp, 1, argList{Yax, Yrl}},
   533  	{Zrp_, 1, argList{Yrl, Yax}},
   534  	{Zr_m, 1, argList{Yrl, Yml}},
   535  	{Zm_r, 1, argList{Yml, Yrl}},
   536  }
   537  
   538  var ydivl = []ytab{
   539  	{Zm_o, 2, argList{Yml}},
   540  }
   541  
   542  var ydivb = []ytab{
   543  	{Zm_o, 2, argList{Ymb}},
   544  }
   545  
   546  var yimul = []ytab{
   547  	{Zm_o, 2, argList{Yml}},
   548  	{Zib_rr, 1, argList{Yi8, Yrl}},
   549  	{Zil_rr, 1, argList{Yi32, Yrl}},
   550  	{Zm_r, 2, argList{Yml, Yrl}},
   551  }
   552  
   553  var yimul3 = []ytab{
   554  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   555  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   556  }
   557  
   558  var ybyte = []ytab{
   559  	{Zbyte, 1, argList{Yi64}},
   560  }
   561  
   562  var yin = []ytab{
   563  	{Zib_, 1, argList{Yi32}},
   564  	{Zlit, 1, argList{}},
   565  }
   566  
   567  var yint = []ytab{
   568  	{Zib_, 1, argList{Yi32}},
   569  }
   570  
   571  var ypushl = []ytab{
   572  	{Zrp_, 1, argList{Yrl}},
   573  	{Zm_o, 2, argList{Ym}},
   574  	{Zib_, 1, argList{Yi8}},
   575  	{Zil_, 1, argList{Yi32}},
   576  }
   577  
   578  var ypopl = []ytab{
   579  	{Z_rp, 1, argList{Yrl}},
   580  	{Zo_m, 2, argList{Ym}},
   581  }
   582  
   583  var ywrfsbase = []ytab{
   584  	{Zm_o, 2, argList{Yrl}},
   585  }
   586  
   587  var yrdrand = []ytab{
   588  	{Zo_m, 2, argList{Yrl}},
   589  }
   590  
   591  var yclflush = []ytab{
   592  	{Zo_m, 2, argList{Ym}},
   593  }
   594  
   595  var ybswap = []ytab{
   596  	{Z_rp, 2, argList{Yrl}},
   597  }
   598  
   599  var yscond = []ytab{
   600  	{Zo_m, 2, argList{Ymb}},
   601  }
   602  
   603  var yjcond = []ytab{
   604  	{Zbr, 0, argList{Ybr}},
   605  	{Zbr, 0, argList{Yi0, Ybr}},
   606  	{Zbr, 1, argList{Yi1, Ybr}},
   607  }
   608  
   609  var yloop = []ytab{
   610  	{Zloop, 1, argList{Ybr}},
   611  }
   612  
   613  var ycall = []ytab{
   614  	{Zcallindreg, 0, argList{Yml}},
   615  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   616  	{Zcallind, 2, argList{Yindir}},
   617  	{Zcall, 0, argList{Ybr}},
   618  	{Zcallcon, 1, argList{Yi32}},
   619  }
   620  
   621  var yduff = []ytab{
   622  	{Zcallduff, 1, argList{Yi32}},
   623  }
   624  
   625  var yjmp = []ytab{
   626  	{Zo_m64, 2, argList{Yml}},
   627  	{Zjmp, 0, argList{Ybr}},
   628  	{Zjmpcon, 1, argList{Yi32}},
   629  }
   630  
   631  var yfmvd = []ytab{
   632  	{Zm_o, 2, argList{Ym, Yf0}},
   633  	{Zo_m, 2, argList{Yf0, Ym}},
   634  	{Zm_o, 2, argList{Yrf, Yf0}},
   635  	{Zo_m, 2, argList{Yf0, Yrf}},
   636  }
   637  
   638  var yfmvdp = []ytab{
   639  	{Zo_m, 2, argList{Yf0, Ym}},
   640  	{Zo_m, 2, argList{Yf0, Yrf}},
   641  }
   642  
   643  var yfmvf = []ytab{
   644  	{Zm_o, 2, argList{Ym, Yf0}},
   645  	{Zo_m, 2, argList{Yf0, Ym}},
   646  }
   647  
   648  var yfmvx = []ytab{
   649  	{Zm_o, 2, argList{Ym, Yf0}},
   650  }
   651  
   652  var yfmvp = []ytab{
   653  	{Zo_m, 2, argList{Yf0, Ym}},
   654  }
   655  
   656  var yfcmv = []ytab{
   657  	{Zm_o, 2, argList{Yrf, Yf0}},
   658  }
   659  
   660  var yfadd = []ytab{
   661  	{Zm_o, 2, argList{Ym, Yf0}},
   662  	{Zm_o, 2, argList{Yrf, Yf0}},
   663  	{Zo_m, 2, argList{Yf0, Yrf}},
   664  }
   665  
   666  var yfxch = []ytab{
   667  	{Zo_m, 2, argList{Yf0, Yrf}},
   668  	{Zm_o, 2, argList{Yrf, Yf0}},
   669  }
   670  
   671  var ycompp = []ytab{
   672  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   673  }
   674  
   675  var ystsw = []ytab{
   676  	{Zo_m, 2, argList{Ym}},
   677  	{Zlit, 1, argList{Yax}},
   678  }
   679  
   680  var ysvrs_mo = []ytab{
   681  	{Zm_o, 2, argList{Ym}},
   682  }
   683  
   684  // unaryDst version of "ysvrs_mo".
   685  var ysvrs_om = []ytab{
   686  	{Zo_m, 2, argList{Ym}},
   687  }
   688  
   689  var ymm = []ytab{
   690  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   691  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   692  }
   693  
   694  var yxm = []ytab{
   695  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   696  }
   697  
   698  var yxm_q4 = []ytab{
   699  	{Zm_r, 1, argList{Yxm, Yxr}},
   700  }
   701  
   702  var yxcvm1 = []ytab{
   703  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   704  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   705  }
   706  
   707  var yxcvm2 = []ytab{
   708  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   709  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   710  }
   711  
   712  var yxr = []ytab{
   713  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   714  }
   715  
   716  var yxr_ml = []ytab{
   717  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   718  }
   719  
   720  var ymr = []ytab{
   721  	{Zm_r, 1, argList{Ymr, Ymr}},
   722  }
   723  
   724  var ymr_ml = []ytab{
   725  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   726  }
   727  
   728  var yxcmpi = []ytab{
   729  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   730  }
   731  
   732  var yxmov = []ytab{
   733  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   734  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   735  }
   736  
   737  var yxcvfl = []ytab{
   738  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   739  }
   740  
   741  var yxcvlf = []ytab{
   742  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   743  }
   744  
   745  var yxcvfq = []ytab{
   746  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   747  }
   748  
   749  var yxcvqf = []ytab{
   750  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   751  }
   752  
   753  var yps = []ytab{
   754  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   755  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   756  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   757  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   758  }
   759  
   760  var yxrrl = []ytab{
   761  	{Zm_r, 1, argList{Yxr, Yrl}},
   762  }
   763  
   764  var ymrxr = []ytab{
   765  	{Zm_r, 1, argList{Ymr, Yxr}},
   766  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   767  }
   768  
   769  var ymshuf = []ytab{
   770  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   771  }
   772  
   773  var ymshufb = []ytab{
   774  	{Zm2_r, 2, argList{Yxm, Yxr}},
   775  }
   776  
   777  // It should never have more than 1 entry,
   778  // because some optab entries have opcode sequences that
   779  // are longer than 2 bytes (zoffset=2 here),
   780  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   781  // to name a few.
   782  var yxshuf = []ytab{
   783  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   784  }
   785  
   786  var yextrw = []ytab{
   787  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   788  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   789  }
   790  
   791  var yextr = []ytab{
   792  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   793  }
   794  
   795  var yinsrw = []ytab{
   796  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   797  }
   798  
   799  var yinsr = []ytab{
   800  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   801  }
   802  
   803  var ypsdq = []ytab{
   804  	{Zibo_m, 2, argList{Yi8, Yxr}},
   805  }
   806  
   807  var ymskb = []ytab{
   808  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   809  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   810  }
   811  
   812  var ycrc32l = []ytab{
   813  	{Zlitm_r, 0, argList{Yml, Yrl}},
   814  }
   815  
   816  var ycrc32b = []ytab{
   817  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   818  }
   819  
   820  var yprefetch = []ytab{
   821  	{Zm_o, 2, argList{Ym}},
   822  }
   823  
   824  var yaes = []ytab{
   825  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   826  }
   827  
   828  var yxbegin = []ytab{
   829  	{Zjmp, 1, argList{Ybr}},
   830  }
   831  
   832  var yxabort = []ytab{
   833  	{Zib_, 1, argList{Yu8}},
   834  }
   835  
   836  var ylddqu = []ytab{
   837  	{Zm_r, 1, argList{Ym, Yxr}},
   838  }
   839  
   840  var ypalignr = []ytab{
   841  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   842  }
   843  
   844  var ysha256rnds2 = []ytab{
   845  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   846  }
   847  
   848  var yblendvpd = []ytab{
   849  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   850  }
   851  
   852  var ymmxmm0f38 = []ytab{
   853  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   854  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   855  }
   856  
   857  var yextractps = []ytab{
   858  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   859  }
   860  
   861  var ysha1rnds4 = []ytab{
   862  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   863  }
   864  
   865  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   866  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   867  // to find the entry with the given p.As and then looks through the ytable for
   868  // that instruction (the second field in the optab struct) for a line whose
   869  // first two values match the Ytypes of the p.From and p.To operands.  The
   870  // function oclass computes the specific Ytype of an operand and then the set
   871  // of more general Ytypes that it satisfies is implied by the ycover table, set
   872  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   873  // from the more general 8-bit constants, but instinit says
   874  //
   875  //	ycover[Yi0*Ymax+Ys32] = 1
   876  //	ycover[Yi1*Ymax+Ys32] = 1
   877  //	ycover[Yi8*Ymax+Ys32] = 1
   878  //
   879  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   880  // if that's what an instruction can handle.
   881  //
   882  // In parallel with the scan through the ytable for the appropriate line, there
   883  // is a z pointer that starts out pointing at the strange magic byte list in
   884  // the Optab struct.  With each step past a non-matching ytable line, z
   885  // advances by the 4th entry in the line.  When a matching line is found, that
   886  // z pointer has the extra data to use in laying down the instruction bytes.
   887  // The actual bytes laid down are a function of the 3rd entry in the line (that
   888  // is, the Ztype) and the z bytes.
   889  //
   890  // For example, let's look at AADDL.  The optab line says:
   891  //
   892  //	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //
   896  //	var yaddl = []ytab{
   897  //	        {Yi8, Ynone, Yml, Zibo_m, 2},
   898  //	        {Yi32, Ynone, Yax, Zil_, 1},
   899  //	        {Yi32, Ynone, Yml, Zilo_m, 2},
   900  //	        {Yrl, Ynone, Yml, Zr_m, 1},
   901  //	        {Yml, Ynone, Yrl, Zm_r, 1},
   902  //	}
   903  //
   904  // so there are 5 possible types of ADDL instruction that can be laid down, and
   905  // possible states used to lay them down (Ztype and z pointer, assuming z
   906  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   907  //
   908  //	Yi8, Yml -> Zibo_m, z (0x83, 00)
   909  //	Yi32, Yax -> Zil_, z+2 (0x05)
   910  //	Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   911  //	Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   912  //	Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   913  //
   914  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   915  // relatively straightforward as this program goes.
   916  //
   917  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   918  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   919  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   920  // Zilo_m is the same but a long (32-bit) immediate.
   921  var optab =
   922  // as, ytab, andproto, opcode
   923  [...]Optab{
   924  	{obj.AXXX, nil, 0, opBytes{}},
   925  	{AAAA, ynone, P32, opBytes{0x37}},
   926  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   927  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   928  	{AAAS, ynone, P32, opBytes{0x3f}},
   929  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   930  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   934  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   935  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   936  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   937  	{AADDPD, yxm, Pq, opBytes{0x58}},
   938  	{AADDPS, yxm, Pm, opBytes{0x58}},
   939  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   940  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   941  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   942  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   943  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   944  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   945  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   946  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   947  	{AADJSP, nil, 0, opBytes{}},
   948  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   949  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   950  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   951  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   952  	{AANDPD, yxm, Pq, opBytes{0x54}},
   953  	{AANDPS, yxm, Pm, opBytes{0x54}},
   954  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   957  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   958  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   959  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   960  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   961  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   962  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   963  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   964  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   988  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   989  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   990  	{ACLI, ynone, Px, opBytes{0xfa}},
   991  	{ACLTS, ynone, Pm, opBytes{0x06}},
   992  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   993  	{ACMC, ynone, Px, opBytes{0xf5}},
   994  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   995  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   996  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   997  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   998  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   999  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1000  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1001  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1002  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1003  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1004  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1005  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1006  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1007  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1008  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1009  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1010  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1011  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1012  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1013  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1014  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1015  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1016  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1017  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1018  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1019  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1020  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1021  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1022  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1023  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1024  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1025  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1026  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1027  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1028  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1029  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1030  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1031  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1032  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1033  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1034  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1035  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1036  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1037  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1038  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1039  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1040  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1041  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1042  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1043  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1045  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1046  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1047  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1048  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1049  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1050  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1051  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1052  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1053  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1055  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1056  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1057  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1058  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1059  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1060  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1061  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1062  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1063  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1064  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1065  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1066  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1067  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1068  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1069  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1070  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1071  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1072  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1073  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1074  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1075  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1076  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1077  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1078  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1079  	{ACWD, ynone, Pe, opBytes{0x99}},
  1080  	{ACWDE, ynone, Px, opBytes{0x98}},
  1081  	{ACQO, ynone, Pw, opBytes{0x99}},
  1082  	{ADAA, ynone, P32, opBytes{0x27}},
  1083  	{ADAS, ynone, P32, opBytes{0x2f}},
  1084  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1085  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1086  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1087  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1088  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1089  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1090  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1091  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1092  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1093  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1094  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1095  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1096  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1097  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1098  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1099  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1100  	{AENTER, nil, 0, opBytes{}}, // botch
  1101  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1102  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1103  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1104  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1105  	{AHLT, ynone, Px, opBytes{0xf4}},
  1106  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1107  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1108  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1109  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1110  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1111  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1118  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1119  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1120  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1121  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1122  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1123  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1124  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1125  	{AINSL, ynone, Px, opBytes{0x6d}},
  1126  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1127  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1128  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1129  	{AINT, yint, Px, opBytes{0xcd}},
  1130  	{AINTO, ynone, P32, opBytes{0xce}},
  1131  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1132  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1133  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1134  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1135  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1136  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1139  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1140  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1141  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1142  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1143  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1144  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1145  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1146  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1147  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1148  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1149  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1150  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1151  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1152  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1153  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1154  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1155  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1156  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1157  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1158  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1159  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1160  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1161  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1162  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1163  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1164  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1165  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1166  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1167  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1168  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1169  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1170  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1171  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1172  	{ALODSL, ynone, Px, opBytes{0xad}},
  1173  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1174  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1175  	{ALONG, ybyte, Px, opBytes{4}},
  1176  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1177  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1178  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1179  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1180  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1181  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1182  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1183  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1184  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1185  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1186  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1187  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1188  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1189  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1190  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1191  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1192  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1193  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1194  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1195  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1196  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1197  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1198  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1199  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1200  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1201  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1202  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1203  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1204  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1205  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1206  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1207  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1208  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1209  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1210  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1211  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1212  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1213  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1214  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1216  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1217  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1218  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1219  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1220  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1221  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1222  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1223  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1224  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1225  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1226  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1227  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1228  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1229  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1230  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1231  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1232  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1233  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1234  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1235  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1236  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1237  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1238  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1239  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1240  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1241  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1242  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1243  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1244  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1245  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1246  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1247  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1248  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1249  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1250  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1251  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1252  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1253  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1254  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1255  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1256  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1257  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1258  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1259  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1260  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1261  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1262  	{AORPD, yxm, Pq, opBytes{0x56}},
  1263  	{AORPS, yxm, Pm, opBytes{0x56}},
  1264  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1267  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1268  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1269  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1270  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1271  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1272  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1273  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1274  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1275  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1276  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1277  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1278  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1279  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1280  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1281  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1282  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1283  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1284  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1285  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1286  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1287  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1288  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1289  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1290  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1291  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1292  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1293  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1294  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1295  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1296  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1297  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1298  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1299  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1300  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1301  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1302  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1303  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1304  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1305  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1306  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1307  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1308  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1309  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1310  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1311  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1312  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1313  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1314  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1315  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1316  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1317  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1318  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1319  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1320  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1321  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1322  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1323  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1324  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1325  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1326  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1327  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1328  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1329  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1330  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1331  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1332  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1333  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1334  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1335  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1336  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1337  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1338  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1339  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1340  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1341  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1342  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1343  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1344  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1345  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1346  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1347  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1348  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1349  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1350  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1351  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1352  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1353  	{APOPAL, ynone, P32, opBytes{0x61}},
  1354  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1355  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1356  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1357  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1358  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1359  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1360  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1361  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1364  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1365  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1366  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1367  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1368  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1369  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1370  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1371  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1372  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1373  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1374  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1375  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1376  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1377  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1378  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1379  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1380  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1381  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1382  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1383  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1384  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1385  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1386  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1387  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1388  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1389  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1390  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1391  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1392  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1393  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1394  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1395  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1396  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1397  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1398  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1399  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1400  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1401  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1402  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1403  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1404  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1405  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1406  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1410  	{AQUAD, ybyte, Px, opBytes{8}},
  1411  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1412  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1416  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1417  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1418  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{AREP, ynone, Px, opBytes{0xf3}},
  1422  	{AREPN, ynone, Px, opBytes{0xf2}},
  1423  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1424  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1425  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1426  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1427  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1428  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1432  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1436  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1437  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1438  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1439  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1443  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1447  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1451  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1452  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1453  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1454  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1455  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1456  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1457  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1458  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1459  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1460  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1461  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1462  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1463  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1464  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1465  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1466  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1467  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1468  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1469  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1470  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1471  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1475  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1479  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1480  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1481  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1482  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1483  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1484  	{ASTC, ynone, Px, opBytes{0xf9}},
  1485  	{ASTD, ynone, Px, opBytes{0xfd}},
  1486  	{ASTI, ynone, Px, opBytes{0xfb}},
  1487  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1488  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1489  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1490  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1491  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1492  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1493  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1494  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1495  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1496  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1497  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1498  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1499  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1500  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1501  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1502  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1503  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1507  	{obj.ATEXT, ytext, Px, opBytes{}},
  1508  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1509  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1510  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1511  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1512  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1513  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1514  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1515  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1516  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1517  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1518  	{AWORD, ybyte, Px, opBytes{2}},
  1519  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1520  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1524  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1525  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1526  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1527  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1528  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1531  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1532  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1533  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1534  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1535  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1536  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1537  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1538  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1539  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1540  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1541  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1542  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1543  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1544  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1545  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1546  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1547  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1548  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1549  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1550  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1551  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1552  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1553  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1554  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1555  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1556  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1559  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1560  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1561  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1562  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1563  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1564  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1565  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1566  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1567  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1568  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1569  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1570  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1571  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1572  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1573  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1574  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1575  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1576  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1577  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1578  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1579  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1580  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1581  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1582  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1583  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1584  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1585  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1586  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1587  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1588  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1589  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1590  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1591  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1592  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1593  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1594  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1595  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1596  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1597  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1598  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1599  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1600  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1601  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1602  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1603  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1604  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1605  	{AFFREE, nil, 0, opBytes{}},
  1606  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1607  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1608  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1609  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1610  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1611  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1612  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1613  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1614  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1615  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1616  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1617  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1618  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1619  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1620  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1621  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1622  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1623  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1624  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1625  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1626  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1627  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1628  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1629  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1630  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1631  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1632  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1633  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1634  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1635  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1636  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1637  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1638  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1639  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1640  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1641  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1642  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1643  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1644  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1645  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1646  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1650  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1651  	{AINVD, ynone, Pm, opBytes{0x08}},
  1652  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1653  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1654  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1655  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1656  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1657  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1658  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1659  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1660  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1661  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1662  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1663  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1664  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1665  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1666  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1667  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1668  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1669  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1670  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1671  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1672  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1673  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1677  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1678  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1679  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1680  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1681  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1682  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1683  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1684  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1685  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1686  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1687  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1688  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1689  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1690  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1691  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1692  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1693  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1694  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1695  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1696  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1697  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1698  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1699  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1700  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1701  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1702  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1703  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1704  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1707  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1708  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1709  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1710  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1711  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1712  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1726  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1727  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1728  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1729  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1735  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1738  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1739  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1742  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1743  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1744  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1747  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1750  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1751  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1752  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1753  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1754  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1755  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1756  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1757  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1758  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1759  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1760  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1761  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1762  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1763  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1764  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1765  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1766  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1767  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1768  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1769  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1770  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1771  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1772  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1773  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1774  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1775  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1776  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1777  	{ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
  1778  
  1779  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1780  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1781  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1782  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1783  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1784  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1785  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1786  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1787  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1788  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1789  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1790  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1791  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1792  
  1793  	{obj.AEND, nil, 0, opBytes{}},
  1794  	{0, nil, 0, opBytes{}},
  1795  }
  1796  
  1797  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1798  
  1799  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1800  // This happens on systems like Solaris that call .so functions instead of system calls.
  1801  // It does not seem to be necessary for any other systems. This is probably working
  1802  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1803  // what that bug is. And this does fix it.
  1804  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1805  	if ctxt.Headtype == objabi.Hsolaris {
  1806  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1807  		return strings.HasPrefix(s.Name, "libc_")
  1808  	}
  1809  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1810  }
  1811  
  1812  // single-instruction no-ops of various lengths.
  1813  // constructed by hand and disassembled with gdb to verify.
  1814  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1815  var nop = [][16]uint8{
  1816  	{0x90},
  1817  	{0x66, 0x90},
  1818  	{0x0F, 0x1F, 0x00},
  1819  	{0x0F, 0x1F, 0x40, 0x00},
  1820  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1821  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1822  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1823  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1824  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1825  }
  1826  
  1827  // Native Client rejects the repeated 0x66 prefix.
  1828  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1829  func fillnop(p []byte, n int) {
  1830  	var m int
  1831  
  1832  	for n > 0 {
  1833  		m = n
  1834  		if m > len(nop) {
  1835  			m = len(nop)
  1836  		}
  1837  		copy(p[:m], nop[m-1][:m])
  1838  		p = p[m:]
  1839  		n -= m
  1840  	}
  1841  }
  1842  
  1843  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1844  	s.Grow(int64(c) + int64(pad))
  1845  	fillnop(s.P[c:], int(pad))
  1846  	return c + pad
  1847  }
  1848  
  1849  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1850  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1851  		return l
  1852  	}
  1853  	return q
  1854  }
  1855  
  1856  // isJump returns whether p is a jump instruction.
  1857  // It is used to ensure that no standalone or macro-fused jump will straddle
  1858  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1859  func isJump(p *obj.Prog) bool {
  1860  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1861  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1862  }
  1863  
  1864  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1865  // jump. Otherwise, nil is returned.
  1866  func lookForJCC(p *obj.Prog) *obj.Prog {
  1867  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1868  	var q *obj.Prog
  1869  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1870  	}
  1871  
  1872  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1873  		return nil
  1874  	}
  1875  
  1876  	switch q.As {
  1877  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1878  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1879  	default:
  1880  		return nil
  1881  	}
  1882  
  1883  	return q
  1884  }
  1885  
  1886  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1887  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1888  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1889  func fusedJump(p *obj.Prog) (bool, uint8) {
  1890  	var fusedSize uint8
  1891  
  1892  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1893  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1894  	// need to be careful to insert any padding before the locks rather than directly after them.
  1895  
  1896  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1897  		fusedSize += p.Isize
  1898  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1899  		}
  1900  		if p == nil {
  1901  			return false, 0
  1902  		}
  1903  	}
  1904  	if p.As == ALOCK {
  1905  		fusedSize += p.Isize
  1906  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1907  		}
  1908  		if p == nil {
  1909  			return false, 0
  1910  		}
  1911  	}
  1912  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1913  
  1914  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1915  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1916  
  1917  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1918  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1919  
  1920  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1921  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1922  
  1923  	if !cmpAddSub && !testAnd && !incDec {
  1924  		return false, 0
  1925  	}
  1926  
  1927  	if !incDec {
  1928  		var argOne obj.AddrType
  1929  		var argTwo obj.AddrType
  1930  		if cmp {
  1931  			argOne = p.From.Type
  1932  			argTwo = p.To.Type
  1933  		} else {
  1934  			argOne = p.To.Type
  1935  			argTwo = p.From.Type
  1936  		}
  1937  		if argOne == obj.TYPE_REG {
  1938  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1939  				return false, 0
  1940  			}
  1941  		} else if argOne == obj.TYPE_MEM {
  1942  			if argTwo != obj.TYPE_REG {
  1943  				return false, 0
  1944  			}
  1945  		} else {
  1946  			return false, 0
  1947  		}
  1948  	}
  1949  
  1950  	fusedSize += p.Isize
  1951  	jmp := lookForJCC(p)
  1952  	if jmp == nil {
  1953  		return false, 0
  1954  	}
  1955  
  1956  	fusedSize += jmp.Isize
  1957  
  1958  	if testAnd {
  1959  		return true, fusedSize
  1960  	}
  1961  
  1962  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1963  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1964  		return false, 0
  1965  	}
  1966  
  1967  	if cmpAddSub {
  1968  		return true, fusedSize
  1969  	}
  1970  
  1971  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1972  		return false, 0
  1973  	}
  1974  
  1975  	return true, fusedSize
  1976  }
  1977  
  1978  type padJumpsCtx int32
  1979  
  1980  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1981  	// Disable jump padding on 32 bit builds by setting
  1982  	// padJumps to 0.
  1983  	if ctxt.Arch.Family == sys.I386 {
  1984  		return padJumpsCtx(0)
  1985  	}
  1986  
  1987  	// Disable jump padding for hand written assembly code.
  1988  	if ctxt.IsAsm {
  1989  		return padJumpsCtx(0)
  1990  	}
  1991  
  1992  	return padJumpsCtx(32)
  1993  }
  1994  
  1995  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1996  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1997  // not cross or end on a 32 byte boundary.
  1998  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  1999  	if pjc == 0 {
  2000  		return c
  2001  	}
  2002  
  2003  	var toPad int32
  2004  	fj, fjSize := fusedJump(p)
  2005  	mask := int32(pjc - 1)
  2006  	if fj {
  2007  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2008  			toPad = int32(pjc) - (c & mask)
  2009  		}
  2010  	} else if isJump(p) {
  2011  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2012  			toPad = int32(pjc) - (c & mask)
  2013  		}
  2014  	}
  2015  	if toPad <= 0 {
  2016  		return c
  2017  	}
  2018  
  2019  	return noppad(ctxt, s, c, toPad)
  2020  }
  2021  
  2022  // reAssemble is called if an instruction's size changes during assembly. If
  2023  // it does and the instruction is a standalone or a macro-fused jump we need to
  2024  // reassemble.
  2025  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2026  	if pjc == 0 {
  2027  		return false
  2028  	}
  2029  
  2030  	fj, _ := fusedJump(p)
  2031  	return fj || isJump(p)
  2032  }
  2033  
  2034  type nopPad struct {
  2035  	p *obj.Prog // Instruction before the pad
  2036  	n int32     // Size of the pad
  2037  }
  2038  
  2039  // requireAlignment ensures that the function alignment is at
  2040  // least as high as a, which should be a power of two
  2041  // and between 8 and 2048, inclusive.
  2042  //
  2043  // the boolean result indicates whether the alignment meets those constraints
  2044  func requireAlignment(a int64, ctxt *obj.Link, cursym *obj.LSym) bool {
  2045  	if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
  2046  		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
  2047  		return false
  2048  	}
  2049  	// By default function alignment is 32 bytes for amd64
  2050  	if cursym.Func().Align < int32(a) {
  2051  		cursym.Func().Align = int32(a)
  2052  	}
  2053  	return true
  2054  }
  2055  
  2056  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2057  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2058  		ctxt.Diag("-spectre=ret not supported on 386")
  2059  		ctxt.Retpoline = false // don't keep printing
  2060  	}
  2061  
  2062  	pjc := makePjcCtx(ctxt)
  2063  
  2064  	if s.P != nil {
  2065  		return
  2066  	}
  2067  
  2068  	if ycover[0] == 0 {
  2069  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2070  	}
  2071  
  2072  	for p := s.Func().Text; p != nil; p = p.Link {
  2073  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2074  			p.To.SetTarget(p)
  2075  		}
  2076  		if p.As == AADJSP {
  2077  			p.To.Type = obj.TYPE_REG
  2078  			p.To.Reg = REG_SP
  2079  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2080  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2081  			// For that case, flip the sign and the op:
  2082  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2083  			switch v := p.From.Offset; {
  2084  			case v == 0:
  2085  				p.As = obj.ANOP
  2086  			case v == 0x80 || (v < 0 && v != -0x80):
  2087  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2088  				p.From.Offset *= -1
  2089  			default:
  2090  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2091  			}
  2092  		}
  2093  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2094  			if p.To.Type != obj.TYPE_REG {
  2095  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2096  				continue
  2097  			}
  2098  			p.To.Type = obj.TYPE_BRANCH
  2099  			p.To.Name = obj.NAME_EXTERN
  2100  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2101  			p.To.Reg = 0
  2102  			p.To.Offset = 0
  2103  		}
  2104  	}
  2105  
  2106  	var count int64 // rough count of number of instructions
  2107  	for p := s.Func().Text; p != nil; p = p.Link {
  2108  		count++
  2109  		p.Back = branchShort // use short branches first time through
  2110  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2111  			p.Back |= branchBackwards
  2112  			q.Back |= branchLoopHead
  2113  		}
  2114  	}
  2115  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2116  
  2117  	var ab AsmBuf
  2118  	var n int
  2119  	var c int32
  2120  	errors := ctxt.Errors
  2121  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2122  	nrelocs0 := len(s.R)
  2123  	for {
  2124  		// This loop continues while there are reasons to re-assemble
  2125  		// whole block, like the presence of long forward jumps.
  2126  		reAssemble := false
  2127  		for i := range s.R[nrelocs0:] {
  2128  			s.R[nrelocs0+i] = obj.Reloc{}
  2129  		}
  2130  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2131  		s.P = s.P[:0]
  2132  		c = 0
  2133  		var pPrev *obj.Prog
  2134  		nops = nops[:0]
  2135  		for p := s.Func().Text; p != nil; p = p.Link {
  2136  			c0 := c
  2137  			c = pjc.padJump(ctxt, s, p, c)
  2138  
  2139  			if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
  2140  				v := obj.AlignmentPadding(c, p, ctxt, s)
  2141  				if v > 0 {
  2142  					s.Grow(int64(c) + int64(v))
  2143  					fillnop(s.P[c:], int(v))
  2144  				}
  2145  				p.Pc = int64(c)
  2146  				c += int32(v)
  2147  				pPrev = p
  2148  				continue
  2149  
  2150  			}
  2151  
  2152  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2153  				// pad with NOPs
  2154  				v := -c & (loopAlign - 1)
  2155  
  2156  				if v <= maxLoopPad {
  2157  					s.Grow(int64(c) + int64(v))
  2158  					fillnop(s.P[c:], int(v))
  2159  					c += v
  2160  				}
  2161  			}
  2162  
  2163  			p.Pc = int64(c)
  2164  
  2165  			// process forward jumps to p
  2166  			for q := p.Rel; q != nil; q = q.Forwd {
  2167  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2168  				if q.Back&branchShort != 0 {
  2169  					if v > 127 {
  2170  						reAssemble = true
  2171  						q.Back ^= branchShort
  2172  					}
  2173  
  2174  					if q.As == AJCXZL || q.As == AXBEGIN {
  2175  						s.P[q.Pc+2] = byte(v)
  2176  					} else {
  2177  						s.P[q.Pc+1] = byte(v)
  2178  					}
  2179  				} else {
  2180  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2181  				}
  2182  			}
  2183  
  2184  			p.Rel = nil
  2185  
  2186  			p.Pc = int64(c)
  2187  			ab.asmins(ctxt, s, p)
  2188  			m := ab.Len()
  2189  			if int(p.Isize) != m {
  2190  				p.Isize = uint8(m)
  2191  				if pjc.reAssemble(p) {
  2192  					// We need to re-assemble here to check for jumps and fused jumps
  2193  					// that span or end on 32 byte boundaries.
  2194  					reAssemble = true
  2195  				}
  2196  			}
  2197  
  2198  			s.Grow(p.Pc + int64(m))
  2199  			copy(s.P[p.Pc:], ab.Bytes())
  2200  			// If there was padding, remember it.
  2201  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2202  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2203  			}
  2204  			c += int32(m)
  2205  			pPrev = p
  2206  		}
  2207  
  2208  		n++
  2209  		if n > 1000 {
  2210  			ctxt.Diag("span must be looping")
  2211  			log.Fatalf("loop")
  2212  		}
  2213  		if !reAssemble {
  2214  			break
  2215  		}
  2216  		if ctxt.Errors > errors {
  2217  			return
  2218  		}
  2219  	}
  2220  	// splice padding nops into Progs
  2221  	for _, n := range nops {
  2222  		pp := n.p
  2223  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2224  		pp.Link = np
  2225  	}
  2226  
  2227  	s.Size = int64(c)
  2228  
  2229  	if false { /* debug['a'] > 1 */
  2230  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2231  		var i int
  2232  		for i = 0; i < len(s.P); i++ {
  2233  			fmt.Printf(" %.2x", s.P[i])
  2234  			if i%16 == 15 {
  2235  				fmt.Printf("\n  %.6x", uint(i+1))
  2236  			}
  2237  		}
  2238  
  2239  		if i%16 != 0 {
  2240  			fmt.Printf("\n")
  2241  		}
  2242  
  2243  		for i := 0; i < len(s.R); i++ {
  2244  			r := &s.R[i]
  2245  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2246  		}
  2247  	}
  2248  
  2249  	// Mark nonpreemptible instruction sequences.
  2250  	// The 2-instruction TLS access sequence
  2251  	//	MOVQ TLS, BX
  2252  	//	MOVQ 0(BX)(TLS*1), BX
  2253  	// is not async preemptible, as if it is preempted and resumed on
  2254  	// a different thread, the TLS address may become invalid.
  2255  	if !CanUse1InsnTLS(ctxt) {
  2256  		useTLS := func(p *obj.Prog) bool {
  2257  			// Only need to mark the second instruction, which has
  2258  			// REG_TLS as Index. (It is okay to interrupt and restart
  2259  			// the first instruction.)
  2260  			return p.From.Index == REG_TLS
  2261  		}
  2262  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2263  	}
  2264  
  2265  	// Now that we know byte offsets, we can generate jump table entries.
  2266  	// TODO: could this live in obj instead of obj/$ARCH?
  2267  	for _, jt := range s.Func().JumpTables {
  2268  		for i, p := range jt.Targets {
  2269  			// The ith jumptable entry points to the p.Pc'th
  2270  			// byte in the function symbol s.
  2271  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
  2272  		}
  2273  	}
  2274  }
  2275  
  2276  func instinit(ctxt *obj.Link) {
  2277  	if ycover[0] != 0 {
  2278  		// Already initialized; stop now.
  2279  		// This happens in the cmd/asm tests,
  2280  		// each of which re-initializes the arch.
  2281  		return
  2282  	}
  2283  
  2284  	switch ctxt.Headtype {
  2285  	case objabi.Hplan9:
  2286  		plan9privates = ctxt.Lookup("_privates")
  2287  	}
  2288  
  2289  	for i := range avxOptab {
  2290  		c := avxOptab[i].as
  2291  		if opindex[c&obj.AMask] != nil {
  2292  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2293  		}
  2294  		opindex[c&obj.AMask] = &avxOptab[i]
  2295  	}
  2296  	for i := 1; optab[i].as != 0; i++ {
  2297  		c := optab[i].as
  2298  		if opindex[c&obj.AMask] != nil {
  2299  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2300  		}
  2301  		opindex[c&obj.AMask] = &optab[i]
  2302  	}
  2303  
  2304  	for i := 0; i < Ymax; i++ {
  2305  		ycover[i*Ymax+i] = 1
  2306  	}
  2307  
  2308  	ycover[Yi0*Ymax+Yu2] = 1
  2309  	ycover[Yi1*Ymax+Yu2] = 1
  2310  
  2311  	ycover[Yi0*Ymax+Yi8] = 1
  2312  	ycover[Yi1*Ymax+Yi8] = 1
  2313  	ycover[Yu2*Ymax+Yi8] = 1
  2314  	ycover[Yu7*Ymax+Yi8] = 1
  2315  
  2316  	ycover[Yi0*Ymax+Yu7] = 1
  2317  	ycover[Yi1*Ymax+Yu7] = 1
  2318  	ycover[Yu2*Ymax+Yu7] = 1
  2319  
  2320  	ycover[Yi0*Ymax+Yu8] = 1
  2321  	ycover[Yi1*Ymax+Yu8] = 1
  2322  	ycover[Yu2*Ymax+Yu8] = 1
  2323  	ycover[Yu7*Ymax+Yu8] = 1
  2324  
  2325  	ycover[Yi0*Ymax+Ys32] = 1
  2326  	ycover[Yi1*Ymax+Ys32] = 1
  2327  	ycover[Yu2*Ymax+Ys32] = 1
  2328  	ycover[Yu7*Ymax+Ys32] = 1
  2329  	ycover[Yu8*Ymax+Ys32] = 1
  2330  	ycover[Yi8*Ymax+Ys32] = 1
  2331  
  2332  	ycover[Yi0*Ymax+Yi32] = 1
  2333  	ycover[Yi1*Ymax+Yi32] = 1
  2334  	ycover[Yu2*Ymax+Yi32] = 1
  2335  	ycover[Yu7*Ymax+Yi32] = 1
  2336  	ycover[Yu8*Ymax+Yi32] = 1
  2337  	ycover[Yi8*Ymax+Yi32] = 1
  2338  	ycover[Ys32*Ymax+Yi32] = 1
  2339  
  2340  	ycover[Yi0*Ymax+Yi64] = 1
  2341  	ycover[Yi1*Ymax+Yi64] = 1
  2342  	ycover[Yu7*Ymax+Yi64] = 1
  2343  	ycover[Yu2*Ymax+Yi64] = 1
  2344  	ycover[Yu8*Ymax+Yi64] = 1
  2345  	ycover[Yi8*Ymax+Yi64] = 1
  2346  	ycover[Ys32*Ymax+Yi64] = 1
  2347  	ycover[Yi32*Ymax+Yi64] = 1
  2348  
  2349  	ycover[Yal*Ymax+Yrb] = 1
  2350  	ycover[Ycl*Ymax+Yrb] = 1
  2351  	ycover[Yax*Ymax+Yrb] = 1
  2352  	ycover[Ycx*Ymax+Yrb] = 1
  2353  	ycover[Yrx*Ymax+Yrb] = 1
  2354  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2355  
  2356  	ycover[Ycl*Ymax+Ycx] = 1
  2357  
  2358  	ycover[Yax*Ymax+Yrx] = 1
  2359  	ycover[Ycx*Ymax+Yrx] = 1
  2360  
  2361  	ycover[Yax*Ymax+Yrl] = 1
  2362  	ycover[Ycx*Ymax+Yrl] = 1
  2363  	ycover[Yrx*Ymax+Yrl] = 1
  2364  	ycover[Yrl32*Ymax+Yrl] = 1
  2365  
  2366  	ycover[Yf0*Ymax+Yrf] = 1
  2367  
  2368  	ycover[Yal*Ymax+Ymb] = 1
  2369  	ycover[Ycl*Ymax+Ymb] = 1
  2370  	ycover[Yax*Ymax+Ymb] = 1
  2371  	ycover[Ycx*Ymax+Ymb] = 1
  2372  	ycover[Yrx*Ymax+Ymb] = 1
  2373  	ycover[Yrb*Ymax+Ymb] = 1
  2374  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2375  	ycover[Ym*Ymax+Ymb] = 1
  2376  
  2377  	ycover[Yax*Ymax+Yml] = 1
  2378  	ycover[Ycx*Ymax+Yml] = 1
  2379  	ycover[Yrx*Ymax+Yml] = 1
  2380  	ycover[Yrl*Ymax+Yml] = 1
  2381  	ycover[Yrl32*Ymax+Yml] = 1
  2382  	ycover[Ym*Ymax+Yml] = 1
  2383  
  2384  	ycover[Yax*Ymax+Ymm] = 1
  2385  	ycover[Ycx*Ymax+Ymm] = 1
  2386  	ycover[Yrx*Ymax+Ymm] = 1
  2387  	ycover[Yrl*Ymax+Ymm] = 1
  2388  	ycover[Yrl32*Ymax+Ymm] = 1
  2389  	ycover[Ym*Ymax+Ymm] = 1
  2390  	ycover[Ymr*Ymax+Ymm] = 1
  2391  
  2392  	ycover[Yxr0*Ymax+Yxr] = 1
  2393  
  2394  	ycover[Ym*Ymax+Yxm] = 1
  2395  	ycover[Yxr0*Ymax+Yxm] = 1
  2396  	ycover[Yxr*Ymax+Yxm] = 1
  2397  
  2398  	ycover[Ym*Ymax+Yym] = 1
  2399  	ycover[Yyr*Ymax+Yym] = 1
  2400  
  2401  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2402  	ycover[Yxr*Ymax+YxrEvex] = 1
  2403  
  2404  	ycover[Ym*Ymax+YxmEvex] = 1
  2405  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2406  	ycover[Yxr*Ymax+YxmEvex] = 1
  2407  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2408  
  2409  	ycover[Yyr*Ymax+YyrEvex] = 1
  2410  
  2411  	ycover[Ym*Ymax+YymEvex] = 1
  2412  	ycover[Yyr*Ymax+YymEvex] = 1
  2413  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2414  
  2415  	ycover[Ym*Ymax+Yzm] = 1
  2416  	ycover[Yzr*Ymax+Yzm] = 1
  2417  
  2418  	ycover[Yk0*Ymax+Yk] = 1
  2419  	ycover[Yknot0*Ymax+Yk] = 1
  2420  
  2421  	ycover[Yk0*Ymax+Ykm] = 1
  2422  	ycover[Yknot0*Ymax+Ykm] = 1
  2423  	ycover[Yk*Ymax+Ykm] = 1
  2424  	ycover[Ym*Ymax+Ykm] = 1
  2425  
  2426  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2427  
  2428  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2429  
  2430  	for i := 0; i < MAXREG; i++ {
  2431  		reg[i] = -1
  2432  		if i >= REG_AL && i <= REG_R15B {
  2433  			reg[i] = (i - REG_AL) & 7
  2434  			if i >= REG_SPB && i <= REG_DIB {
  2435  				regrex[i] = 0x40
  2436  			}
  2437  			if i >= REG_R8B && i <= REG_R15B {
  2438  				regrex[i] = Rxr | Rxx | Rxb
  2439  			}
  2440  		}
  2441  
  2442  		if i >= REG_AH && i <= REG_BH {
  2443  			reg[i] = 4 + ((i - REG_AH) & 7)
  2444  		}
  2445  		if i >= REG_AX && i <= REG_R15 {
  2446  			reg[i] = (i - REG_AX) & 7
  2447  			if i >= REG_R8 {
  2448  				regrex[i] = Rxr | Rxx | Rxb
  2449  			}
  2450  		}
  2451  
  2452  		if i >= REG_F0 && i <= REG_F0+7 {
  2453  			reg[i] = (i - REG_F0) & 7
  2454  		}
  2455  		if i >= REG_M0 && i <= REG_M0+7 {
  2456  			reg[i] = (i - REG_M0) & 7
  2457  		}
  2458  		if i >= REG_K0 && i <= REG_K0+7 {
  2459  			reg[i] = (i - REG_K0) & 7
  2460  		}
  2461  		if i >= REG_X0 && i <= REG_X0+15 {
  2462  			reg[i] = (i - REG_X0) & 7
  2463  			if i >= REG_X0+8 {
  2464  				regrex[i] = Rxr | Rxx | Rxb
  2465  			}
  2466  		}
  2467  		if i >= REG_X16 && i <= REG_X16+15 {
  2468  			reg[i] = (i - REG_X16) & 7
  2469  			if i >= REG_X16+8 {
  2470  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2471  			} else {
  2472  				regrex[i] = RxrEvex
  2473  			}
  2474  		}
  2475  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2476  			reg[i] = (i - REG_Y0) & 7
  2477  			if i >= REG_Y0+8 {
  2478  				regrex[i] = Rxr | Rxx | Rxb
  2479  			}
  2480  		}
  2481  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2482  			reg[i] = (i - REG_Y16) & 7
  2483  			if i >= REG_Y16+8 {
  2484  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2485  			} else {
  2486  				regrex[i] = RxrEvex
  2487  			}
  2488  		}
  2489  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2490  			reg[i] = (i - REG_Z0) & 7
  2491  			if i > REG_Z0+7 {
  2492  				regrex[i] = Rxr | Rxx | Rxb
  2493  			}
  2494  		}
  2495  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2496  			reg[i] = (i - REG_Z16) & 7
  2497  			if i >= REG_Z16+8 {
  2498  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2499  			} else {
  2500  				regrex[i] = RxrEvex
  2501  			}
  2502  		}
  2503  
  2504  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2505  			regrex[i] = Rxr
  2506  		}
  2507  	}
  2508  }
  2509  
  2510  var isAndroid = buildcfg.GOOS == "android"
  2511  
  2512  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2513  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2514  		return 0
  2515  	}
  2516  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2517  		switch a.Reg {
  2518  		case REG_CS:
  2519  			return 0x2e
  2520  
  2521  		case REG_DS:
  2522  			return 0x3e
  2523  
  2524  		case REG_ES:
  2525  			return 0x26
  2526  
  2527  		case REG_FS:
  2528  			return 0x64
  2529  
  2530  		case REG_GS:
  2531  			return 0x65
  2532  
  2533  		case REG_TLS:
  2534  			// NOTE: Systems listed here should be only systems that
  2535  			// support direct TLS references like 8(TLS) implemented as
  2536  			// direct references from FS or GS. Systems that require
  2537  			// the initial-exec model, where you load the TLS base into
  2538  			// a register and then index from that register, do not reach
  2539  			// this code and should not be listed.
  2540  			if ctxt.Arch.Family == sys.I386 {
  2541  				switch ctxt.Headtype {
  2542  				default:
  2543  					if isAndroid {
  2544  						return 0x65 // GS
  2545  					}
  2546  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2547  
  2548  				case objabi.Hdarwin,
  2549  					objabi.Hdragonfly,
  2550  					objabi.Hfreebsd,
  2551  					objabi.Hnetbsd,
  2552  					objabi.Hopenbsd:
  2553  					return 0x65 // GS
  2554  				}
  2555  			}
  2556  
  2557  			switch ctxt.Headtype {
  2558  			default:
  2559  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2560  
  2561  			case objabi.Hlinux:
  2562  				if isAndroid {
  2563  					return 0x64 // FS
  2564  				}
  2565  
  2566  				if ctxt.Flag_shared {
  2567  					log.Fatalf("unknown TLS base register for linux with -shared")
  2568  				} else {
  2569  					return 0x64 // FS
  2570  				}
  2571  
  2572  			case objabi.Hdragonfly,
  2573  				objabi.Hfreebsd,
  2574  				objabi.Hnetbsd,
  2575  				objabi.Hopenbsd,
  2576  				objabi.Hsolaris:
  2577  				return 0x64 // FS
  2578  
  2579  			case objabi.Hdarwin:
  2580  				return 0x65 // GS
  2581  			}
  2582  		}
  2583  	}
  2584  
  2585  	switch a.Index {
  2586  	case REG_CS:
  2587  		return 0x2e
  2588  
  2589  	case REG_DS:
  2590  		return 0x3e
  2591  
  2592  	case REG_ES:
  2593  		return 0x26
  2594  
  2595  	case REG_TLS:
  2596  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2597  			// When building for inclusion into a shared library, an instruction of the form
  2598  			//     MOV off(CX)(TLS*1), AX
  2599  			// becomes
  2600  			//     mov %gs:off(%ecx), %eax // on i386
  2601  			//     mov %fs:off(%rcx), %rax // on amd64
  2602  			// which assumes that the correct TLS offset has been loaded into CX (today
  2603  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2604  			// a shared library the instruction it becomes
  2605  			//     mov 0x0(%ecx), %eax // on i386
  2606  			//     mov 0x0(%rcx), %rax // on amd64
  2607  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2608  			if ctxt.Arch.Family == sys.I386 {
  2609  				return 0x65 // GS
  2610  			}
  2611  			return 0x64 // FS
  2612  		}
  2613  
  2614  	case REG_FS:
  2615  		return 0x64
  2616  
  2617  	case REG_GS:
  2618  		return 0x65
  2619  	}
  2620  
  2621  	return 0
  2622  }
  2623  
  2624  // oclassRegList returns multisource operand class for addr.
  2625  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2626  	// TODO(quasilyte): when oclass register case is refactored into
  2627  	// lookup table, use it here to get register kind more easily.
  2628  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2629  
  2630  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2631  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2632  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2633  
  2634  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2635  	low := regIndex(int16(reg0))
  2636  	high := regIndex(int16(reg1))
  2637  
  2638  	if ctxt.Arch.Family == sys.I386 {
  2639  		if low >= 8 || high >= 8 {
  2640  			return Yxxx
  2641  		}
  2642  	}
  2643  
  2644  	switch high - low {
  2645  	case 3:
  2646  		switch {
  2647  		case regIsXmm(reg0) && regIsXmm(reg1):
  2648  			return YxrEvexMulti4
  2649  		case regIsYmm(reg0) && regIsYmm(reg1):
  2650  			return YyrEvexMulti4
  2651  		case regIsZmm(reg0) && regIsZmm(reg1):
  2652  			return YzrMulti4
  2653  		default:
  2654  			return Yxxx
  2655  		}
  2656  	default:
  2657  		return Yxxx
  2658  	}
  2659  }
  2660  
  2661  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2662  // For addr that is not V-mem returns (Yxxx, false).
  2663  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2664  	switch addr.Index {
  2665  	case REG_X0 + 0,
  2666  		REG_X0 + 1,
  2667  		REG_X0 + 2,
  2668  		REG_X0 + 3,
  2669  		REG_X0 + 4,
  2670  		REG_X0 + 5,
  2671  		REG_X0 + 6,
  2672  		REG_X0 + 7:
  2673  		return Yxvm, true
  2674  	case REG_X8 + 0,
  2675  		REG_X8 + 1,
  2676  		REG_X8 + 2,
  2677  		REG_X8 + 3,
  2678  		REG_X8 + 4,
  2679  		REG_X8 + 5,
  2680  		REG_X8 + 6,
  2681  		REG_X8 + 7:
  2682  		if ctxt.Arch.Family == sys.I386 {
  2683  			return Yxxx, true
  2684  		}
  2685  		return Yxvm, true
  2686  	case REG_X16 + 0,
  2687  		REG_X16 + 1,
  2688  		REG_X16 + 2,
  2689  		REG_X16 + 3,
  2690  		REG_X16 + 4,
  2691  		REG_X16 + 5,
  2692  		REG_X16 + 6,
  2693  		REG_X16 + 7,
  2694  		REG_X16 + 8,
  2695  		REG_X16 + 9,
  2696  		REG_X16 + 10,
  2697  		REG_X16 + 11,
  2698  		REG_X16 + 12,
  2699  		REG_X16 + 13,
  2700  		REG_X16 + 14,
  2701  		REG_X16 + 15:
  2702  		if ctxt.Arch.Family == sys.I386 {
  2703  			return Yxxx, true
  2704  		}
  2705  		return YxvmEvex, true
  2706  
  2707  	case REG_Y0 + 0,
  2708  		REG_Y0 + 1,
  2709  		REG_Y0 + 2,
  2710  		REG_Y0 + 3,
  2711  		REG_Y0 + 4,
  2712  		REG_Y0 + 5,
  2713  		REG_Y0 + 6,
  2714  		REG_Y0 + 7:
  2715  		return Yyvm, true
  2716  	case REG_Y8 + 0,
  2717  		REG_Y8 + 1,
  2718  		REG_Y8 + 2,
  2719  		REG_Y8 + 3,
  2720  		REG_Y8 + 4,
  2721  		REG_Y8 + 5,
  2722  		REG_Y8 + 6,
  2723  		REG_Y8 + 7:
  2724  		if ctxt.Arch.Family == sys.I386 {
  2725  			return Yxxx, true
  2726  		}
  2727  		return Yyvm, true
  2728  	case REG_Y16 + 0,
  2729  		REG_Y16 + 1,
  2730  		REG_Y16 + 2,
  2731  		REG_Y16 + 3,
  2732  		REG_Y16 + 4,
  2733  		REG_Y16 + 5,
  2734  		REG_Y16 + 6,
  2735  		REG_Y16 + 7,
  2736  		REG_Y16 + 8,
  2737  		REG_Y16 + 9,
  2738  		REG_Y16 + 10,
  2739  		REG_Y16 + 11,
  2740  		REG_Y16 + 12,
  2741  		REG_Y16 + 13,
  2742  		REG_Y16 + 14,
  2743  		REG_Y16 + 15:
  2744  		if ctxt.Arch.Family == sys.I386 {
  2745  			return Yxxx, true
  2746  		}
  2747  		return YyvmEvex, true
  2748  
  2749  	case REG_Z0 + 0,
  2750  		REG_Z0 + 1,
  2751  		REG_Z0 + 2,
  2752  		REG_Z0 + 3,
  2753  		REG_Z0 + 4,
  2754  		REG_Z0 + 5,
  2755  		REG_Z0 + 6,
  2756  		REG_Z0 + 7:
  2757  		return Yzvm, true
  2758  	case REG_Z8 + 0,
  2759  		REG_Z8 + 1,
  2760  		REG_Z8 + 2,
  2761  		REG_Z8 + 3,
  2762  		REG_Z8 + 4,
  2763  		REG_Z8 + 5,
  2764  		REG_Z8 + 6,
  2765  		REG_Z8 + 7,
  2766  		REG_Z8 + 8,
  2767  		REG_Z8 + 9,
  2768  		REG_Z8 + 10,
  2769  		REG_Z8 + 11,
  2770  		REG_Z8 + 12,
  2771  		REG_Z8 + 13,
  2772  		REG_Z8 + 14,
  2773  		REG_Z8 + 15,
  2774  		REG_Z8 + 16,
  2775  		REG_Z8 + 17,
  2776  		REG_Z8 + 18,
  2777  		REG_Z8 + 19,
  2778  		REG_Z8 + 20,
  2779  		REG_Z8 + 21,
  2780  		REG_Z8 + 22,
  2781  		REG_Z8 + 23:
  2782  		if ctxt.Arch.Family == sys.I386 {
  2783  			return Yxxx, true
  2784  		}
  2785  		return Yzvm, true
  2786  	}
  2787  
  2788  	return Yxxx, false
  2789  }
  2790  
  2791  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2792  	switch a.Type {
  2793  	case obj.TYPE_REGLIST:
  2794  		return oclassRegList(ctxt, a)
  2795  
  2796  	case obj.TYPE_NONE:
  2797  		return Ynone
  2798  
  2799  	case obj.TYPE_BRANCH:
  2800  		return Ybr
  2801  
  2802  	case obj.TYPE_INDIR:
  2803  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2804  			return Yindir
  2805  		}
  2806  		return Yxxx
  2807  
  2808  	case obj.TYPE_MEM:
  2809  		// Pseudo registers have negative index, but SP is
  2810  		// not pseudo on x86, hence REG_SP check is not redundant.
  2811  		if a.Index == REG_SP || a.Index < 0 {
  2812  			// Can't use FP/SB/PC/SP as the index register.
  2813  			return Yxxx
  2814  		}
  2815  
  2816  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2817  			return vmem
  2818  		}
  2819  
  2820  		if ctxt.Arch.Family == sys.AMD64 {
  2821  			switch a.Name {
  2822  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2823  				// Global variables can't use index registers and their
  2824  				// base register is %rip (%rip is encoded as REG_NONE).
  2825  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2826  					return Yxxx
  2827  				}
  2828  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2829  				// These names must have a base of SP.  The old compiler
  2830  				// uses 0 for the base register. SSA uses REG_SP.
  2831  				if a.Reg != REG_SP && a.Reg != 0 {
  2832  					return Yxxx
  2833  				}
  2834  			case obj.NAME_NONE:
  2835  				// everything is ok
  2836  			default:
  2837  				// unknown name
  2838  				return Yxxx
  2839  			}
  2840  		}
  2841  		return Ym
  2842  
  2843  	case obj.TYPE_ADDR:
  2844  		switch a.Name {
  2845  		case obj.NAME_GOTREF:
  2846  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2847  			return Yxxx
  2848  
  2849  		case obj.NAME_EXTERN,
  2850  			obj.NAME_STATIC:
  2851  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2852  				return Yi32
  2853  			}
  2854  			return Yiauto // use pc-relative addressing
  2855  
  2856  		case obj.NAME_AUTO,
  2857  			obj.NAME_PARAM:
  2858  			return Yiauto
  2859  		}
  2860  
  2861  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2862  		// and got Yi32 in an earlier version of this code.
  2863  		// Keep doing that until we fix yduff etc.
  2864  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2865  			return Yi32
  2866  		}
  2867  
  2868  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2869  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2870  		}
  2871  		fallthrough
  2872  
  2873  	case obj.TYPE_CONST:
  2874  		if a.Sym != nil {
  2875  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2876  		}
  2877  
  2878  		v := a.Offset
  2879  		if ctxt.Arch.Family == sys.I386 {
  2880  			v = int64(int32(v))
  2881  		}
  2882  		switch {
  2883  		case v == 0:
  2884  			return Yi0
  2885  		case v == 1:
  2886  			return Yi1
  2887  		case v >= 0 && v <= 3:
  2888  			return Yu2
  2889  		case v >= 0 && v <= 127:
  2890  			return Yu7
  2891  		case v >= 0 && v <= 255:
  2892  			return Yu8
  2893  		case v >= -128 && v <= 127:
  2894  			return Yi8
  2895  		}
  2896  		if ctxt.Arch.Family == sys.I386 {
  2897  			return Yi32
  2898  		}
  2899  		l := int32(v)
  2900  		if int64(l) == v {
  2901  			return Ys32 // can sign extend
  2902  		}
  2903  		if v>>32 == 0 {
  2904  			return Yi32 // unsigned
  2905  		}
  2906  		return Yi64
  2907  
  2908  	case obj.TYPE_TEXTSIZE:
  2909  		return Ytextsize
  2910  	}
  2911  
  2912  	if a.Type != obj.TYPE_REG {
  2913  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2914  		return Yxxx
  2915  	}
  2916  
  2917  	switch a.Reg {
  2918  	case REG_AL:
  2919  		return Yal
  2920  
  2921  	case REG_AX:
  2922  		return Yax
  2923  
  2924  		/*
  2925  			case REG_SPB:
  2926  		*/
  2927  	case REG_BPB,
  2928  		REG_SIB,
  2929  		REG_DIB,
  2930  		REG_R8B,
  2931  		REG_R9B,
  2932  		REG_R10B,
  2933  		REG_R11B,
  2934  		REG_R12B,
  2935  		REG_R13B,
  2936  		REG_R14B,
  2937  		REG_R15B:
  2938  		if ctxt.Arch.Family == sys.I386 {
  2939  			return Yxxx
  2940  		}
  2941  		fallthrough
  2942  
  2943  	case REG_DL,
  2944  		REG_BL,
  2945  		REG_AH,
  2946  		REG_CH,
  2947  		REG_DH,
  2948  		REG_BH:
  2949  		return Yrb
  2950  
  2951  	case REG_CL:
  2952  		return Ycl
  2953  
  2954  	case REG_CX:
  2955  		return Ycx
  2956  
  2957  	case REG_DX, REG_BX:
  2958  		return Yrx
  2959  
  2960  	case REG_R8, // not really Yrl
  2961  		REG_R9,
  2962  		REG_R10,
  2963  		REG_R11,
  2964  		REG_R12,
  2965  		REG_R13,
  2966  		REG_R14,
  2967  		REG_R15:
  2968  		if ctxt.Arch.Family == sys.I386 {
  2969  			return Yxxx
  2970  		}
  2971  		fallthrough
  2972  
  2973  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2974  		if ctxt.Arch.Family == sys.I386 {
  2975  			return Yrl32
  2976  		}
  2977  		return Yrl
  2978  
  2979  	case REG_F0 + 0:
  2980  		return Yf0
  2981  
  2982  	case REG_F0 + 1,
  2983  		REG_F0 + 2,
  2984  		REG_F0 + 3,
  2985  		REG_F0 + 4,
  2986  		REG_F0 + 5,
  2987  		REG_F0 + 6,
  2988  		REG_F0 + 7:
  2989  		return Yrf
  2990  
  2991  	case REG_M0 + 0,
  2992  		REG_M0 + 1,
  2993  		REG_M0 + 2,
  2994  		REG_M0 + 3,
  2995  		REG_M0 + 4,
  2996  		REG_M0 + 5,
  2997  		REG_M0 + 6,
  2998  		REG_M0 + 7:
  2999  		return Ymr
  3000  
  3001  	case REG_X0:
  3002  		return Yxr0
  3003  
  3004  	case REG_X0 + 1,
  3005  		REG_X0 + 2,
  3006  		REG_X0 + 3,
  3007  		REG_X0 + 4,
  3008  		REG_X0 + 5,
  3009  		REG_X0 + 6,
  3010  		REG_X0 + 7,
  3011  		REG_X0 + 8,
  3012  		REG_X0 + 9,
  3013  		REG_X0 + 10,
  3014  		REG_X0 + 11,
  3015  		REG_X0 + 12,
  3016  		REG_X0 + 13,
  3017  		REG_X0 + 14,
  3018  		REG_X0 + 15:
  3019  		return Yxr
  3020  
  3021  	case REG_X0 + 16,
  3022  		REG_X0 + 17,
  3023  		REG_X0 + 18,
  3024  		REG_X0 + 19,
  3025  		REG_X0 + 20,
  3026  		REG_X0 + 21,
  3027  		REG_X0 + 22,
  3028  		REG_X0 + 23,
  3029  		REG_X0 + 24,
  3030  		REG_X0 + 25,
  3031  		REG_X0 + 26,
  3032  		REG_X0 + 27,
  3033  		REG_X0 + 28,
  3034  		REG_X0 + 29,
  3035  		REG_X0 + 30,
  3036  		REG_X0 + 31:
  3037  		return YxrEvex
  3038  
  3039  	case REG_Y0 + 0,
  3040  		REG_Y0 + 1,
  3041  		REG_Y0 + 2,
  3042  		REG_Y0 + 3,
  3043  		REG_Y0 + 4,
  3044  		REG_Y0 + 5,
  3045  		REG_Y0 + 6,
  3046  		REG_Y0 + 7,
  3047  		REG_Y0 + 8,
  3048  		REG_Y0 + 9,
  3049  		REG_Y0 + 10,
  3050  		REG_Y0 + 11,
  3051  		REG_Y0 + 12,
  3052  		REG_Y0 + 13,
  3053  		REG_Y0 + 14,
  3054  		REG_Y0 + 15:
  3055  		return Yyr
  3056  
  3057  	case REG_Y0 + 16,
  3058  		REG_Y0 + 17,
  3059  		REG_Y0 + 18,
  3060  		REG_Y0 + 19,
  3061  		REG_Y0 + 20,
  3062  		REG_Y0 + 21,
  3063  		REG_Y0 + 22,
  3064  		REG_Y0 + 23,
  3065  		REG_Y0 + 24,
  3066  		REG_Y0 + 25,
  3067  		REG_Y0 + 26,
  3068  		REG_Y0 + 27,
  3069  		REG_Y0 + 28,
  3070  		REG_Y0 + 29,
  3071  		REG_Y0 + 30,
  3072  		REG_Y0 + 31:
  3073  		return YyrEvex
  3074  
  3075  	case REG_Z0 + 0,
  3076  		REG_Z0 + 1,
  3077  		REG_Z0 + 2,
  3078  		REG_Z0 + 3,
  3079  		REG_Z0 + 4,
  3080  		REG_Z0 + 5,
  3081  		REG_Z0 + 6,
  3082  		REG_Z0 + 7:
  3083  		return Yzr
  3084  
  3085  	case REG_Z0 + 8,
  3086  		REG_Z0 + 9,
  3087  		REG_Z0 + 10,
  3088  		REG_Z0 + 11,
  3089  		REG_Z0 + 12,
  3090  		REG_Z0 + 13,
  3091  		REG_Z0 + 14,
  3092  		REG_Z0 + 15,
  3093  		REG_Z0 + 16,
  3094  		REG_Z0 + 17,
  3095  		REG_Z0 + 18,
  3096  		REG_Z0 + 19,
  3097  		REG_Z0 + 20,
  3098  		REG_Z0 + 21,
  3099  		REG_Z0 + 22,
  3100  		REG_Z0 + 23,
  3101  		REG_Z0 + 24,
  3102  		REG_Z0 + 25,
  3103  		REG_Z0 + 26,
  3104  		REG_Z0 + 27,
  3105  		REG_Z0 + 28,
  3106  		REG_Z0 + 29,
  3107  		REG_Z0 + 30,
  3108  		REG_Z0 + 31:
  3109  		if ctxt.Arch.Family == sys.I386 {
  3110  			return Yxxx
  3111  		}
  3112  		return Yzr
  3113  
  3114  	case REG_K0:
  3115  		return Yk0
  3116  
  3117  	case REG_K0 + 1,
  3118  		REG_K0 + 2,
  3119  		REG_K0 + 3,
  3120  		REG_K0 + 4,
  3121  		REG_K0 + 5,
  3122  		REG_K0 + 6,
  3123  		REG_K0 + 7:
  3124  		return Yknot0
  3125  
  3126  	case REG_CS:
  3127  		return Ycs
  3128  	case REG_SS:
  3129  		return Yss
  3130  	case REG_DS:
  3131  		return Yds
  3132  	case REG_ES:
  3133  		return Yes
  3134  	case REG_FS:
  3135  		return Yfs
  3136  	case REG_GS:
  3137  		return Ygs
  3138  	case REG_TLS:
  3139  		return Ytls
  3140  
  3141  	case REG_GDTR:
  3142  		return Ygdtr
  3143  	case REG_IDTR:
  3144  		return Yidtr
  3145  	case REG_LDTR:
  3146  		return Yldtr
  3147  	case REG_MSW:
  3148  		return Ymsw
  3149  	case REG_TASK:
  3150  		return Ytask
  3151  
  3152  	case REG_CR + 0:
  3153  		return Ycr0
  3154  	case REG_CR + 1:
  3155  		return Ycr1
  3156  	case REG_CR + 2:
  3157  		return Ycr2
  3158  	case REG_CR + 3:
  3159  		return Ycr3
  3160  	case REG_CR + 4:
  3161  		return Ycr4
  3162  	case REG_CR + 5:
  3163  		return Ycr5
  3164  	case REG_CR + 6:
  3165  		return Ycr6
  3166  	case REG_CR + 7:
  3167  		return Ycr7
  3168  	case REG_CR + 8:
  3169  		return Ycr8
  3170  
  3171  	case REG_DR + 0:
  3172  		return Ydr0
  3173  	case REG_DR + 1:
  3174  		return Ydr1
  3175  	case REG_DR + 2:
  3176  		return Ydr2
  3177  	case REG_DR + 3:
  3178  		return Ydr3
  3179  	case REG_DR + 4:
  3180  		return Ydr4
  3181  	case REG_DR + 5:
  3182  		return Ydr5
  3183  	case REG_DR + 6:
  3184  		return Ydr6
  3185  	case REG_DR + 7:
  3186  		return Ydr7
  3187  
  3188  	case REG_TR + 0:
  3189  		return Ytr0
  3190  	case REG_TR + 1:
  3191  		return Ytr1
  3192  	case REG_TR + 2:
  3193  		return Ytr2
  3194  	case REG_TR + 3:
  3195  		return Ytr3
  3196  	case REG_TR + 4:
  3197  		return Ytr4
  3198  	case REG_TR + 5:
  3199  		return Ytr5
  3200  	case REG_TR + 6:
  3201  		return Ytr6
  3202  	case REG_TR + 7:
  3203  		return Ytr7
  3204  	}
  3205  
  3206  	return Yxxx
  3207  }
  3208  
  3209  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3210  // and hold assembly state.
  3211  type AsmBuf struct {
  3212  	buf      [100]byte
  3213  	off      int
  3214  	rexflag  int
  3215  	vexflag  bool // Per inst: true for VEX-encoded
  3216  	evexflag bool // Per inst: true for EVEX-encoded
  3217  	rep      bool
  3218  	repn     bool
  3219  	lock     bool
  3220  
  3221  	evex evexBits // Initialized when evexflag is true
  3222  }
  3223  
  3224  // Put1 appends one byte to the end of the buffer.
  3225  func (ab *AsmBuf) Put1(x byte) {
  3226  	ab.buf[ab.off] = x
  3227  	ab.off++
  3228  }
  3229  
  3230  // Put2 appends two bytes to the end of the buffer.
  3231  func (ab *AsmBuf) Put2(x, y byte) {
  3232  	ab.buf[ab.off+0] = x
  3233  	ab.buf[ab.off+1] = y
  3234  	ab.off += 2
  3235  }
  3236  
  3237  // Put3 appends three bytes to the end of the buffer.
  3238  func (ab *AsmBuf) Put3(x, y, z byte) {
  3239  	ab.buf[ab.off+0] = x
  3240  	ab.buf[ab.off+1] = y
  3241  	ab.buf[ab.off+2] = z
  3242  	ab.off += 3
  3243  }
  3244  
  3245  // Put4 appends four bytes to the end of the buffer.
  3246  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3247  	ab.buf[ab.off+0] = x
  3248  	ab.buf[ab.off+1] = y
  3249  	ab.buf[ab.off+2] = z
  3250  	ab.buf[ab.off+3] = w
  3251  	ab.off += 4
  3252  }
  3253  
  3254  // PutInt16 writes v into the buffer using little-endian encoding.
  3255  func (ab *AsmBuf) PutInt16(v int16) {
  3256  	ab.buf[ab.off+0] = byte(v)
  3257  	ab.buf[ab.off+1] = byte(v >> 8)
  3258  	ab.off += 2
  3259  }
  3260  
  3261  // PutInt32 writes v into the buffer using little-endian encoding.
  3262  func (ab *AsmBuf) PutInt32(v int32) {
  3263  	ab.buf[ab.off+0] = byte(v)
  3264  	ab.buf[ab.off+1] = byte(v >> 8)
  3265  	ab.buf[ab.off+2] = byte(v >> 16)
  3266  	ab.buf[ab.off+3] = byte(v >> 24)
  3267  	ab.off += 4
  3268  }
  3269  
  3270  // PutInt64 writes v into the buffer using little-endian encoding.
  3271  func (ab *AsmBuf) PutInt64(v int64) {
  3272  	ab.buf[ab.off+0] = byte(v)
  3273  	ab.buf[ab.off+1] = byte(v >> 8)
  3274  	ab.buf[ab.off+2] = byte(v >> 16)
  3275  	ab.buf[ab.off+3] = byte(v >> 24)
  3276  	ab.buf[ab.off+4] = byte(v >> 32)
  3277  	ab.buf[ab.off+5] = byte(v >> 40)
  3278  	ab.buf[ab.off+6] = byte(v >> 48)
  3279  	ab.buf[ab.off+7] = byte(v >> 56)
  3280  	ab.off += 8
  3281  }
  3282  
  3283  // Put copies b into the buffer.
  3284  func (ab *AsmBuf) Put(b []byte) {
  3285  	copy(ab.buf[ab.off:], b)
  3286  	ab.off += len(b)
  3287  }
  3288  
  3289  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3290  // starting at specified offset (e.g. z counter value).
  3291  // Trailing 0 is not written.
  3292  //
  3293  // Intended to be used for literal Z cases.
  3294  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3295  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3296  	for int(op[offset]) != 0 {
  3297  		ab.Put1(byte(op[offset]))
  3298  		offset++
  3299  	}
  3300  }
  3301  
  3302  // Insert inserts b at offset i.
  3303  func (ab *AsmBuf) Insert(i int, b byte) {
  3304  	ab.off++
  3305  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3306  	ab.buf[i] = b
  3307  }
  3308  
  3309  // Last returns the byte at the end of the buffer.
  3310  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3311  
  3312  // Len returns the length of the buffer.
  3313  func (ab *AsmBuf) Len() int { return ab.off }
  3314  
  3315  // Bytes returns the contents of the buffer.
  3316  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3317  
  3318  // Reset empties the buffer.
  3319  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3320  
  3321  // At returns the byte at offset i.
  3322  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3323  
  3324  // asmidx emits SIB byte.
  3325  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3326  	var i int
  3327  
  3328  	// X/Y index register is used in VSIB.
  3329  	switch index {
  3330  	default:
  3331  		goto bad
  3332  
  3333  	case REG_NONE:
  3334  		i = 4 << 3
  3335  		goto bas
  3336  
  3337  	case REG_R8,
  3338  		REG_R9,
  3339  		REG_R10,
  3340  		REG_R11,
  3341  		REG_R12,
  3342  		REG_R13,
  3343  		REG_R14,
  3344  		REG_R15,
  3345  		REG_X8,
  3346  		REG_X9,
  3347  		REG_X10,
  3348  		REG_X11,
  3349  		REG_X12,
  3350  		REG_X13,
  3351  		REG_X14,
  3352  		REG_X15,
  3353  		REG_X16,
  3354  		REG_X17,
  3355  		REG_X18,
  3356  		REG_X19,
  3357  		REG_X20,
  3358  		REG_X21,
  3359  		REG_X22,
  3360  		REG_X23,
  3361  		REG_X24,
  3362  		REG_X25,
  3363  		REG_X26,
  3364  		REG_X27,
  3365  		REG_X28,
  3366  		REG_X29,
  3367  		REG_X30,
  3368  		REG_X31,
  3369  		REG_Y8,
  3370  		REG_Y9,
  3371  		REG_Y10,
  3372  		REG_Y11,
  3373  		REG_Y12,
  3374  		REG_Y13,
  3375  		REG_Y14,
  3376  		REG_Y15,
  3377  		REG_Y16,
  3378  		REG_Y17,
  3379  		REG_Y18,
  3380  		REG_Y19,
  3381  		REG_Y20,
  3382  		REG_Y21,
  3383  		REG_Y22,
  3384  		REG_Y23,
  3385  		REG_Y24,
  3386  		REG_Y25,
  3387  		REG_Y26,
  3388  		REG_Y27,
  3389  		REG_Y28,
  3390  		REG_Y29,
  3391  		REG_Y30,
  3392  		REG_Y31,
  3393  		REG_Z8,
  3394  		REG_Z9,
  3395  		REG_Z10,
  3396  		REG_Z11,
  3397  		REG_Z12,
  3398  		REG_Z13,
  3399  		REG_Z14,
  3400  		REG_Z15,
  3401  		REG_Z16,
  3402  		REG_Z17,
  3403  		REG_Z18,
  3404  		REG_Z19,
  3405  		REG_Z20,
  3406  		REG_Z21,
  3407  		REG_Z22,
  3408  		REG_Z23,
  3409  		REG_Z24,
  3410  		REG_Z25,
  3411  		REG_Z26,
  3412  		REG_Z27,
  3413  		REG_Z28,
  3414  		REG_Z29,
  3415  		REG_Z30,
  3416  		REG_Z31:
  3417  		if ctxt.Arch.Family == sys.I386 {
  3418  			goto bad
  3419  		}
  3420  		fallthrough
  3421  
  3422  	case REG_AX,
  3423  		REG_CX,
  3424  		REG_DX,
  3425  		REG_BX,
  3426  		REG_BP,
  3427  		REG_SI,
  3428  		REG_DI,
  3429  		REG_X0,
  3430  		REG_X1,
  3431  		REG_X2,
  3432  		REG_X3,
  3433  		REG_X4,
  3434  		REG_X5,
  3435  		REG_X6,
  3436  		REG_X7,
  3437  		REG_Y0,
  3438  		REG_Y1,
  3439  		REG_Y2,
  3440  		REG_Y3,
  3441  		REG_Y4,
  3442  		REG_Y5,
  3443  		REG_Y6,
  3444  		REG_Y7,
  3445  		REG_Z0,
  3446  		REG_Z1,
  3447  		REG_Z2,
  3448  		REG_Z3,
  3449  		REG_Z4,
  3450  		REG_Z5,
  3451  		REG_Z6,
  3452  		REG_Z7:
  3453  		i = reg[index] << 3
  3454  	}
  3455  
  3456  	switch scale {
  3457  	default:
  3458  		goto bad
  3459  
  3460  	case 1:
  3461  		break
  3462  
  3463  	case 2:
  3464  		i |= 1 << 6
  3465  
  3466  	case 4:
  3467  		i |= 2 << 6
  3468  
  3469  	case 8:
  3470  		i |= 3 << 6
  3471  	}
  3472  
  3473  bas:
  3474  	switch base {
  3475  	default:
  3476  		goto bad
  3477  
  3478  	case REG_NONE: // must be mod=00
  3479  		i |= 5
  3480  
  3481  	case REG_R8,
  3482  		REG_R9,
  3483  		REG_R10,
  3484  		REG_R11,
  3485  		REG_R12,
  3486  		REG_R13,
  3487  		REG_R14,
  3488  		REG_R15:
  3489  		if ctxt.Arch.Family == sys.I386 {
  3490  			goto bad
  3491  		}
  3492  		fallthrough
  3493  
  3494  	case REG_AX,
  3495  		REG_CX,
  3496  		REG_DX,
  3497  		REG_BX,
  3498  		REG_SP,
  3499  		REG_BP,
  3500  		REG_SI,
  3501  		REG_DI:
  3502  		i |= reg[base]
  3503  	}
  3504  
  3505  	ab.Put1(byte(i))
  3506  	return
  3507  
  3508  bad:
  3509  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3510  	ab.Put1(0)
  3511  }
  3512  
  3513  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3514  	var rel obj.Reloc
  3515  
  3516  	v := vaddr(ctxt, p, a, &rel)
  3517  	if rel.Siz != 0 {
  3518  		if rel.Siz != 4 {
  3519  			ctxt.Diag("bad reloc")
  3520  		}
  3521  		r := obj.Addrel(cursym)
  3522  		*r = rel
  3523  		r.Off = int32(p.Pc + int64(ab.Len()))
  3524  	}
  3525  
  3526  	ab.PutInt32(int32(v))
  3527  }
  3528  
  3529  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3530  	if r != nil {
  3531  		*r = obj.Reloc{}
  3532  	}
  3533  
  3534  	switch a.Name {
  3535  	case obj.NAME_STATIC,
  3536  		obj.NAME_GOTREF,
  3537  		obj.NAME_EXTERN:
  3538  		s := a.Sym
  3539  		if r == nil {
  3540  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3541  			log.Fatalf("reloc")
  3542  		}
  3543  
  3544  		if a.Name == obj.NAME_GOTREF {
  3545  			r.Siz = 4
  3546  			r.Type = objabi.R_GOTPCREL
  3547  		} else if useAbs(ctxt, s) {
  3548  			r.Siz = 4
  3549  			r.Type = objabi.R_ADDR
  3550  		} else {
  3551  			r.Siz = 4
  3552  			r.Type = objabi.R_PCREL
  3553  		}
  3554  
  3555  		r.Off = -1 // caller must fill in
  3556  		r.Sym = s
  3557  		r.Add = a.Offset
  3558  
  3559  		return 0
  3560  	}
  3561  
  3562  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3563  		if r == nil {
  3564  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3565  			log.Fatalf("reloc")
  3566  		}
  3567  
  3568  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3569  			r.Type = objabi.R_TLS_LE
  3570  			r.Siz = 4
  3571  			r.Off = -1 // caller must fill in
  3572  			r.Add = a.Offset
  3573  		}
  3574  		return 0
  3575  	}
  3576  
  3577  	return a.Offset
  3578  }
  3579  
  3580  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3581  	var base int
  3582  	var rel obj.Reloc
  3583  
  3584  	rex &= 0x40 | Rxr
  3585  	if a.Offset != int64(int32(a.Offset)) {
  3586  		// The rules are slightly different for 386 and AMD64,
  3587  		// mostly for historical reasons. We may unify them later,
  3588  		// but it must be discussed beforehand.
  3589  		//
  3590  		// For 64bit mode only LEAL is allowed to overflow.
  3591  		// It's how https://golang.org/cl/59630 made it.
  3592  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3593  		//
  3594  		// For 32bit mode rules are more permissive.
  3595  		// If offset fits uint32, it's permitted.
  3596  		// This is allowed for assembly that wants to use 32-bit hex
  3597  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3598  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3599  			(ctxt.Arch.Family != sys.AMD64 &&
  3600  				int64(uint32(a.Offset)) == a.Offset &&
  3601  				ab.rexflag&Rxw == 0)
  3602  		if !overflowOK {
  3603  			ctxt.Diag("offset too large in %s", p)
  3604  		}
  3605  	}
  3606  	v := int32(a.Offset)
  3607  	rel.Siz = 0
  3608  
  3609  	switch a.Type {
  3610  	case obj.TYPE_ADDR:
  3611  		if a.Name == obj.NAME_NONE {
  3612  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3613  		}
  3614  		if a.Index == REG_TLS {
  3615  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3616  		}
  3617  		goto bad
  3618  
  3619  	case obj.TYPE_REG:
  3620  		const regFirst = REG_AL
  3621  		const regLast = REG_Z31
  3622  		if a.Reg < regFirst || regLast < a.Reg {
  3623  			goto bad
  3624  		}
  3625  		if v != 0 {
  3626  			goto bad
  3627  		}
  3628  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3629  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3630  		return
  3631  	}
  3632  
  3633  	if a.Type != obj.TYPE_MEM {
  3634  		goto bad
  3635  	}
  3636  
  3637  	if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
  3638  		base := int(a.Reg)
  3639  		switch a.Name {
  3640  		case obj.NAME_EXTERN,
  3641  			obj.NAME_GOTREF,
  3642  			obj.NAME_STATIC:
  3643  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3644  				goto bad
  3645  			}
  3646  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3647  				// The base register has already been set. It holds the PC
  3648  				// of this instruction returned by a PC-reading thunk.
  3649  				// See obj6.go:rewriteToPcrel.
  3650  			} else {
  3651  				base = REG_NONE
  3652  			}
  3653  			v = int32(vaddr(ctxt, p, a, &rel))
  3654  
  3655  		case obj.NAME_AUTO,
  3656  			obj.NAME_PARAM:
  3657  			base = REG_SP
  3658  		}
  3659  
  3660  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3661  		if base == REG_NONE {
  3662  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3663  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3664  			goto putrelv
  3665  		}
  3666  
  3667  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3668  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3669  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3670  			return
  3671  		}
  3672  
  3673  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3674  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3675  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3676  			ab.Put1(disp8)
  3677  			return
  3678  		}
  3679  
  3680  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3681  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3682  		goto putrelv
  3683  	}
  3684  
  3685  	base = int(a.Reg)
  3686  	switch a.Name {
  3687  	case obj.NAME_STATIC,
  3688  		obj.NAME_GOTREF,
  3689  		obj.NAME_EXTERN:
  3690  		if a.Sym == nil {
  3691  			ctxt.Diag("bad addr: %v", p)
  3692  		}
  3693  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3694  			// The base register has already been set. It holds the PC
  3695  			// of this instruction returned by a PC-reading thunk.
  3696  			// See obj6.go:rewriteToPcrel.
  3697  		} else {
  3698  			base = REG_NONE
  3699  		}
  3700  		v = int32(vaddr(ctxt, p, a, &rel))
  3701  
  3702  	case obj.NAME_AUTO,
  3703  		obj.NAME_PARAM:
  3704  		base = REG_SP
  3705  	}
  3706  
  3707  	if base == REG_TLS {
  3708  		v = int32(vaddr(ctxt, p, a, &rel))
  3709  	}
  3710  
  3711  	ab.rexflag |= regrex[base]&Rxb | rex
  3712  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3713  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3714  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3715  				ctxt.Diag("%v has offset against gotref", p)
  3716  			}
  3717  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3718  			goto putrelv
  3719  		}
  3720  
  3721  		// temporary
  3722  		ab.Put2(
  3723  			byte(0<<6|4<<0|r<<3), // sib present
  3724  			0<<6|4<<3|5<<0,       // DS:d32
  3725  		)
  3726  		goto putrelv
  3727  	}
  3728  
  3729  	if base == REG_SP || base == REG_R12 {
  3730  		if v == 0 {
  3731  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3732  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3733  			return
  3734  		}
  3735  
  3736  		if disp8, ok := toDisp8(v, p, ab); ok {
  3737  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3738  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3739  			ab.Put1(disp8)
  3740  			return
  3741  		}
  3742  
  3743  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3744  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3745  		goto putrelv
  3746  	}
  3747  
  3748  	if REG_AX <= base && base <= REG_R15 {
  3749  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
  3750  			ctxt.Headtype != objabi.Hwindows {
  3751  			rel = obj.Reloc{}
  3752  			rel.Type = objabi.R_TLS_LE
  3753  			rel.Siz = 4
  3754  			rel.Sym = nil
  3755  			rel.Add = int64(v)
  3756  			v = 0
  3757  		}
  3758  
  3759  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3760  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3761  			return
  3762  		}
  3763  
  3764  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3765  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3766  			return
  3767  		}
  3768  
  3769  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3770  		goto putrelv
  3771  	}
  3772  
  3773  	goto bad
  3774  
  3775  putrelv:
  3776  	if rel.Siz != 0 {
  3777  		if rel.Siz != 4 {
  3778  			ctxt.Diag("bad rel")
  3779  			goto bad
  3780  		}
  3781  
  3782  		r := obj.Addrel(cursym)
  3783  		*r = rel
  3784  		r.Off = int32(p.Pc + int64(ab.Len()))
  3785  	}
  3786  
  3787  	ab.PutInt32(v)
  3788  	return
  3789  
  3790  bad:
  3791  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3792  }
  3793  
  3794  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3795  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3796  }
  3797  
  3798  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3799  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3800  }
  3801  
  3802  func bytereg(a *obj.Addr, t *uint8) {
  3803  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3804  		a.Reg += REG_AL - REG_AX
  3805  		*t = 0
  3806  	}
  3807  }
  3808  
  3809  func unbytereg(a *obj.Addr, t *uint8) {
  3810  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3811  		a.Reg += REG_AX - REG_AL
  3812  		*t = 0
  3813  	}
  3814  }
  3815  
  3816  const (
  3817  	movLit uint8 = iota // Like Zlit
  3818  	movRegMem
  3819  	movMemReg
  3820  	movRegMem2op
  3821  	movMemReg2op
  3822  	movFullPtr // Load full pointer, trash heap (unsupported)
  3823  	movDoubleShift
  3824  	movTLSReg
  3825  )
  3826  
  3827  var ymovtab = []movtab{
  3828  	// push
  3829  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3830  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3831  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3832  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3833  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3834  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3835  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3836  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3837  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3838  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3839  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3840  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3841  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3842  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3843  
  3844  	// pop
  3845  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3846  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3847  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3848  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3849  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3850  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3851  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3852  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3853  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3854  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3855  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3856  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3857  
  3858  	// mov seg
  3859  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3860  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3861  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3862  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3863  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3864  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3865  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3866  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3867  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3868  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3869  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3870  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3871  
  3872  	// mov cr
  3873  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3874  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3875  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3876  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3877  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3878  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3879  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3880  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3881  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3882  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3883  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3884  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3885  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3886  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3887  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3888  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3889  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3890  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3891  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3892  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3893  
  3894  	// mov dr
  3895  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3896  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3897  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3898  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3899  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3900  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3901  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3902  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3903  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3904  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3905  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3906  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3907  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3908  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3909  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3910  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3911  
  3912  	// mov tr
  3913  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3914  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3915  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3916  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3917  
  3918  	// lgdt, sgdt, lidt, sidt
  3919  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3920  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3921  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3922  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3923  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3924  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3925  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3926  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3927  
  3928  	// lldt, sldt
  3929  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3930  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3931  
  3932  	// lmsw, smsw
  3933  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3934  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3935  
  3936  	// ltr, str
  3937  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3938  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3939  
  3940  	/* load full pointer - unsupported
  3941  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3942  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3943  	*/
  3944  
  3945  	// double shift
  3946  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3947  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3948  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3949  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3950  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3951  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3952  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3953  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3954  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3955  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3956  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3957  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3958  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3959  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3960  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3961  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3962  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3963  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3964  
  3965  	// load TLS base
  3966  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3967  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3968  	{0, 0, 0, 0, 0, [4]uint8{}},
  3969  }
  3970  
  3971  func isax(a *obj.Addr) bool {
  3972  	switch a.Reg {
  3973  	case REG_AX, REG_AL, REG_AH:
  3974  		return true
  3975  	}
  3976  
  3977  	return a.Index == REG_AX
  3978  }
  3979  
  3980  func subreg(p *obj.Prog, from int, to int) {
  3981  	if false { /* debug['Q'] */
  3982  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3983  	}
  3984  
  3985  	if int(p.From.Reg) == from {
  3986  		p.From.Reg = int16(to)
  3987  		p.Ft = 0
  3988  	}
  3989  
  3990  	if int(p.To.Reg) == from {
  3991  		p.To.Reg = int16(to)
  3992  		p.Tt = 0
  3993  	}
  3994  
  3995  	if int(p.From.Index) == from {
  3996  		p.From.Index = int16(to)
  3997  		p.Ft = 0
  3998  	}
  3999  
  4000  	if int(p.To.Index) == from {
  4001  		p.To.Index = int16(to)
  4002  		p.Tt = 0
  4003  	}
  4004  
  4005  	if false { /* debug['Q'] */
  4006  		fmt.Printf("%v\n", p)
  4007  	}
  4008  }
  4009  
  4010  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  4011  	switch op {
  4012  	case Pm, Pe, Pf2, Pf3:
  4013  		if osize != 1 {
  4014  			if op != Pm {
  4015  				ab.Put1(byte(op))
  4016  			}
  4017  			ab.Put1(Pm)
  4018  			z++
  4019  			op = int(o.op[z])
  4020  			break
  4021  		}
  4022  		fallthrough
  4023  
  4024  	default:
  4025  		if ab.Len() == 0 || ab.Last() != Pm {
  4026  			ab.Put1(Pm)
  4027  		}
  4028  	}
  4029  
  4030  	ab.Put1(byte(op))
  4031  	return z
  4032  }
  4033  
  4034  var bpduff1 = []byte{
  4035  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4036  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4037  }
  4038  
  4039  var bpduff2 = []byte{
  4040  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4041  }
  4042  
  4043  // asmevex emits EVEX pregis and opcode byte.
  4044  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4045  // K-masking register.
  4046  //
  4047  // Expects asmbuf.evex to be properly initialized.
  4048  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4049  	ab.evexflag = true
  4050  	evex := ab.evex
  4051  
  4052  	rexR := byte(1)
  4053  	evexR := byte(1)
  4054  	rexX := byte(1)
  4055  	rexB := byte(1)
  4056  	if r != nil {
  4057  		if regrex[r.Reg]&Rxr != 0 {
  4058  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4059  		}
  4060  		if regrex[r.Reg]&RxrEvex != 0 {
  4061  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4062  		}
  4063  	}
  4064  	if rm != nil {
  4065  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4066  			rexX = 0
  4067  		} else if regrex[rm.Index]&Rxx != 0 {
  4068  			rexX = 0
  4069  		}
  4070  		if regrex[rm.Reg]&Rxb != 0 {
  4071  			rexB = 0
  4072  		}
  4073  	}
  4074  	// P0 = [R][X][B][R'][00][mm]
  4075  	p0 := (rexR << 7) |
  4076  		(rexX << 6) |
  4077  		(rexB << 5) |
  4078  		(evexR << 4) |
  4079  		(0 << 2) |
  4080  		(evex.M() << 0)
  4081  
  4082  	vexV := byte(0)
  4083  	if v != nil {
  4084  		// 4bit-wide reg index.
  4085  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4086  	}
  4087  	vexV ^= 0x0F
  4088  	// P1 = [W][vvvv][1][pp]
  4089  	p1 := (evex.W() << 7) |
  4090  		(vexV << 3) |
  4091  		(1 << 2) |
  4092  		(evex.P() << 0)
  4093  
  4094  	suffix := evexSuffixMap[p.Scond]
  4095  	evexZ := byte(0)
  4096  	evexLL := evex.L()
  4097  	evexB := byte(0)
  4098  	evexV := byte(1)
  4099  	evexA := byte(0)
  4100  	if suffix.zeroing {
  4101  		if !evex.ZeroingEnabled() {
  4102  			ctxt.Diag("unsupported zeroing: %v", p)
  4103  		}
  4104  		if k == nil {
  4105  			// When you request zeroing you must specify a mask register.
  4106  			// See issue 57952.
  4107  			ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
  4108  		} else if k.Reg == REG_K0 {
  4109  			// The mask register must not be K0. That restriction is already
  4110  			// handled by the Yknot0 restriction in the opcode tables, so we
  4111  			// won't ever reach here. But put something sensible here just in case.
  4112  			ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
  4113  		}
  4114  		evexZ = 1
  4115  	}
  4116  	switch {
  4117  	case suffix.rounding != rcUnset:
  4118  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4119  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4120  		} else if !evex.RoundingEnabled() {
  4121  			ctxt.Diag("unsupported rounding: %v", p)
  4122  		}
  4123  		evexB = 1
  4124  		evexLL = suffix.rounding
  4125  	case suffix.broadcast:
  4126  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4127  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4128  		} else if !evex.BroadcastEnabled() {
  4129  			ctxt.Diag("unsupported broadcast: %v", p)
  4130  		}
  4131  		evexB = 1
  4132  	case suffix.sae:
  4133  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4134  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4135  		} else if !evex.SaeEnabled() {
  4136  			ctxt.Diag("unsupported SAE: %v", p)
  4137  		}
  4138  		evexB = 1
  4139  	}
  4140  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4141  		evexV = 0
  4142  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4143  		evexV = 0 // VSR selector 5th bit.
  4144  	}
  4145  	if k != nil {
  4146  		evexA = byte(reg[k.Reg])
  4147  	}
  4148  	// P2 = [z][L'L][b][V'][aaa]
  4149  	p2 := (evexZ << 7) |
  4150  		(evexLL << 5) |
  4151  		(evexB << 4) |
  4152  		(evexV << 3) |
  4153  		(evexA << 0)
  4154  
  4155  	const evexEscapeByte = 0x62
  4156  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4157  	ab.Put1(evex.opcode)
  4158  }
  4159  
  4160  // Emit VEX prefix and opcode byte.
  4161  // The three addresses are the r/m, vvvv, and reg fields.
  4162  // The reg and rm arguments appear in the same order as the
  4163  // arguments to asmand, which typically follows the call to asmvex.
  4164  // The final two arguments are the VEX prefix (see encoding above)
  4165  // and the opcode byte.
  4166  // For details about vex prefix see:
  4167  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4168  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4169  	ab.vexflag = true
  4170  	rexR := 0
  4171  	if r != nil {
  4172  		rexR = regrex[r.Reg] & Rxr
  4173  	}
  4174  	rexB := 0
  4175  	rexX := 0
  4176  	if rm != nil {
  4177  		rexB = regrex[rm.Reg] & Rxb
  4178  		rexX = regrex[rm.Index] & Rxx
  4179  	}
  4180  	vexM := (vex >> 3) & 0x7
  4181  	vexWLP := vex & 0x87
  4182  	vexV := byte(0)
  4183  	if v != nil {
  4184  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4185  	}
  4186  	vexV ^= 0xF
  4187  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4188  		// Can use 2-byte encoding.
  4189  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4190  	} else {
  4191  		// Must use 3-byte encoding.
  4192  		ab.Put3(0xc4,
  4193  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4194  			vexV<<3|vexWLP,
  4195  		)
  4196  	}
  4197  	ab.Put1(opcode)
  4198  }
  4199  
  4200  // regIndex returns register index that fits in 5 bits.
  4201  //
  4202  //	R         : 3 bit | legacy instructions     | N/A
  4203  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4204  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4205  //
  4206  // Examples:
  4207  //
  4208  //	REG_Z30 => 30
  4209  //	REG_X15 => 15
  4210  //	REG_R9  => 9
  4211  //	REG_AX  => 0
  4212  func regIndex(r int16) int {
  4213  	lower3bits := reg[r]
  4214  	high4bit := regrex[r] & Rxr << 1
  4215  	high5bit := regrex[r] & RxrEvex << 0
  4216  	return lower3bits | high4bit | high5bit
  4217  }
  4218  
  4219  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4220  // Reports errors via ctxt.
  4221  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4222  	// If any pair of the index, mask, or destination registers
  4223  	// are the same, illegal instruction trap (#UD) is triggered.
  4224  	index := regIndex(p.GetFrom3().Index)
  4225  	mask := regIndex(p.From.Reg)
  4226  	dest := regIndex(p.To.Reg)
  4227  	if dest == mask || dest == index || mask == index {
  4228  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4229  		return false
  4230  	}
  4231  
  4232  	return true
  4233  }
  4234  
  4235  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4236  // Reports errors via ctxt.
  4237  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4238  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4239  	// register is the same as index vector in VSIB.
  4240  	index := regIndex(p.From.Index)
  4241  	dest := regIndex(p.To.Reg)
  4242  	if dest == index {
  4243  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4244  		return false
  4245  	}
  4246  
  4247  	return true
  4248  }
  4249  
  4250  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4251  	o := opindex[p.As&obj.AMask]
  4252  
  4253  	if o == nil {
  4254  		ctxt.Diag("asmins: missing op %v", p)
  4255  		return
  4256  	}
  4257  
  4258  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4259  		ab.Put1(byte(pre))
  4260  	}
  4261  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4262  		ab.Put1(byte(pre))
  4263  	}
  4264  
  4265  	// Checks to warn about instruction/arguments combinations that
  4266  	// will unconditionally trigger illegal instruction trap (#UD).
  4267  	switch p.As {
  4268  	case AVGATHERDPD,
  4269  		AVGATHERQPD,
  4270  		AVGATHERDPS,
  4271  		AVGATHERQPS,
  4272  		AVPGATHERDD,
  4273  		AVPGATHERQD,
  4274  		AVPGATHERDQ,
  4275  		AVPGATHERQQ:
  4276  		if p.GetFrom3() == nil {
  4277  			// gathers need a 3rd arg. See issue 58822.
  4278  			ctxt.Diag("need a third arg for gather instruction: %v", p)
  4279  			return
  4280  		}
  4281  		// AVX512 gather requires explicit K mask.
  4282  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4283  			if !avx512gatherValid(ctxt, p) {
  4284  				return
  4285  			}
  4286  		} else {
  4287  			if !avx2gatherValid(ctxt, p) {
  4288  				return
  4289  			}
  4290  		}
  4291  	}
  4292  
  4293  	if p.Ft == 0 {
  4294  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4295  	}
  4296  	if p.Tt == 0 {
  4297  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4298  	}
  4299  
  4300  	ft := int(p.Ft) * Ymax
  4301  	var f3t int
  4302  	tt := int(p.Tt) * Ymax
  4303  
  4304  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4305  	z := 0
  4306  	var a *obj.Addr
  4307  	var l int
  4308  	var op int
  4309  	var q *obj.Prog
  4310  	var r *obj.Reloc
  4311  	var rel obj.Reloc
  4312  	var v int64
  4313  
  4314  	args := make([]int, 0, argListMax)
  4315  	if ft != Ynone*Ymax {
  4316  		args = append(args, ft)
  4317  	}
  4318  	for i := range p.RestArgs {
  4319  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4320  	}
  4321  	if tt != Ynone*Ymax {
  4322  		args = append(args, tt)
  4323  	}
  4324  
  4325  	for _, yt := range o.ytab {
  4326  		// ytab matching is purely args-based,
  4327  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4328  		// add EVEX-only filter that will reject non-EVEX matches.
  4329  		//
  4330  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4331  		// Without this rule, operands will lead to VEX-encoded form
  4332  		// and produce "c5b15813" encoding.
  4333  		if !yt.match(args) {
  4334  			// "xo" is always zero for VEX/EVEX encoded insts.
  4335  			z += int(yt.zoffset) + xo
  4336  		} else {
  4337  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4338  				// Do not signal error and continue to search
  4339  				// for matching EVEX-encoded form.
  4340  				z += int(yt.zoffset)
  4341  				continue
  4342  			}
  4343  
  4344  			switch o.prefix {
  4345  			case Px1: // first option valid only in 32-bit mode
  4346  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4347  					z += int(yt.zoffset) + xo
  4348  					continue
  4349  				}
  4350  			case Pq: // 16 bit escape and opcode escape
  4351  				ab.Put2(Pe, Pm)
  4352  
  4353  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4354  				ab.rexflag |= Pw
  4355  				ab.Put2(Pe, Pm)
  4356  
  4357  			case Pq4: // 66 0F 38
  4358  				ab.Put3(0x66, 0x0F, 0x38)
  4359  
  4360  			case Pq4w: // 66 0F 38 + REX.W
  4361  				ab.rexflag |= Pw
  4362  				ab.Put3(0x66, 0x0F, 0x38)
  4363  
  4364  			case Pq5: // F3 0F 38
  4365  				ab.Put3(0xF3, 0x0F, 0x38)
  4366  
  4367  			case Pq5w: //  F3 0F 38 + REX.W
  4368  				ab.rexflag |= Pw
  4369  				ab.Put3(0xF3, 0x0F, 0x38)
  4370  
  4371  			case Pf2, // xmm opcode escape
  4372  				Pf3:
  4373  				ab.Put2(o.prefix, Pm)
  4374  
  4375  			case Pef3:
  4376  				ab.Put3(Pe, Pf3, Pm)
  4377  
  4378  			case Pfw: // xmm opcode escape + REX.W
  4379  				ab.rexflag |= Pw
  4380  				ab.Put2(Pf3, Pm)
  4381  
  4382  			case Pm: // opcode escape
  4383  				ab.Put1(Pm)
  4384  
  4385  			case Pe: // 16 bit escape
  4386  				ab.Put1(Pe)
  4387  
  4388  			case Pw: // 64-bit escape
  4389  				if ctxt.Arch.Family != sys.AMD64 {
  4390  					ctxt.Diag("asmins: illegal 64: %v", p)
  4391  				}
  4392  				ab.rexflag |= Pw
  4393  
  4394  			case Pw8: // 64-bit escape if z >= 8
  4395  				if z >= 8 {
  4396  					if ctxt.Arch.Family != sys.AMD64 {
  4397  						ctxt.Diag("asmins: illegal 64: %v", p)
  4398  					}
  4399  					ab.rexflag |= Pw
  4400  				}
  4401  
  4402  			case Pb: // botch
  4403  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4404  					goto bad
  4405  				}
  4406  				// NOTE(rsc): This is probably safe to do always,
  4407  				// but when enabled it chooses different encodings
  4408  				// than the old cmd/internal/obj/i386 code did,
  4409  				// which breaks our "same bits out" checks.
  4410  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4411  				// in the original obj/i386, and it would encode
  4412  				// (using a valid, shorter form) as 3c 00 if we enabled
  4413  				// the call to bytereg here.
  4414  				if ctxt.Arch.Family == sys.AMD64 {
  4415  					bytereg(&p.From, &p.Ft)
  4416  					bytereg(&p.To, &p.Tt)
  4417  				}
  4418  
  4419  			case P32: // 32 bit but illegal if 64-bit mode
  4420  				if ctxt.Arch.Family == sys.AMD64 {
  4421  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4422  				}
  4423  
  4424  			case Py: // 64-bit only, no prefix
  4425  				if ctxt.Arch.Family != sys.AMD64 {
  4426  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4427  				}
  4428  
  4429  			case Py1: // 64-bit only if z < 1, no prefix
  4430  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4431  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4432  				}
  4433  
  4434  			case Py3: // 64-bit only if z < 3, no prefix
  4435  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4436  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4437  				}
  4438  			}
  4439  
  4440  			if z >= len(o.op) {
  4441  				log.Fatalf("asmins bad table %v", p)
  4442  			}
  4443  			op = int(o.op[z])
  4444  			if op == 0x0f {
  4445  				ab.Put1(byte(op))
  4446  				z++
  4447  				op = int(o.op[z])
  4448  			}
  4449  
  4450  			switch yt.zcase {
  4451  			default:
  4452  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4453  				return
  4454  
  4455  			case Zpseudo:
  4456  				break
  4457  
  4458  			case Zlit:
  4459  				ab.PutOpBytesLit(z, &o.op)
  4460  
  4461  			case Zlitr_m:
  4462  				ab.PutOpBytesLit(z, &o.op)
  4463  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4464  
  4465  			case Zlitm_r:
  4466  				ab.PutOpBytesLit(z, &o.op)
  4467  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4468  
  4469  			case Zlit_m_r:
  4470  				ab.PutOpBytesLit(z, &o.op)
  4471  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4472  
  4473  			case Zmb_r:
  4474  				bytereg(&p.From, &p.Ft)
  4475  				fallthrough
  4476  
  4477  			case Zm_r:
  4478  				ab.Put1(byte(op))
  4479  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4480  
  4481  			case Z_m_r:
  4482  				ab.Put1(byte(op))
  4483  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4484  
  4485  			case Zm2_r:
  4486  				ab.Put2(byte(op), o.op[z+1])
  4487  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4488  
  4489  			case Zm_r_xm:
  4490  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4491  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4492  
  4493  			case Zm_r_xm_nr:
  4494  				ab.rexflag = 0
  4495  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4496  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4497  
  4498  			case Zm_r_i_xm:
  4499  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4500  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4501  				ab.Put1(byte(p.To.Offset))
  4502  
  4503  			case Zibm_r, Zibr_m:
  4504  				ab.PutOpBytesLit(z, &o.op)
  4505  				if yt.zcase == Zibr_m {
  4506  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4507  				} else {
  4508  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4509  				}
  4510  				switch {
  4511  				default:
  4512  					ab.Put1(byte(p.From.Offset))
  4513  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4514  					ab.PutInt16(int16(p.From.Offset))
  4515  				case yt.args[0] == Yi32:
  4516  					ab.PutInt32(int32(p.From.Offset))
  4517  				}
  4518  
  4519  			case Zaut_r:
  4520  				ab.Put1(0x8d) // leal
  4521  				if p.From.Type != obj.TYPE_ADDR {
  4522  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4523  				}
  4524  				p.From.Type = obj.TYPE_MEM
  4525  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4526  				p.From.Type = obj.TYPE_ADDR
  4527  
  4528  			case Zm_o:
  4529  				ab.Put1(byte(op))
  4530  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4531  
  4532  			case Zr_m:
  4533  				ab.Put1(byte(op))
  4534  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4535  
  4536  			case Zvex:
  4537  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4538  
  4539  			case Zvex_rm_v_r:
  4540  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4541  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4542  
  4543  			case Zvex_rm_v_ro:
  4544  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4545  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4546  
  4547  			case Zvex_i_rm_vo:
  4548  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4549  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4550  				ab.Put1(byte(p.From.Offset))
  4551  
  4552  			case Zvex_i_r_v:
  4553  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4554  				regnum := byte(0x7)
  4555  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4556  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4557  				} else {
  4558  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4559  				}
  4560  				ab.Put1(o.op[z+2] | regnum)
  4561  				ab.Put1(byte(p.From.Offset))
  4562  
  4563  			case Zvex_i_rm_v_r:
  4564  				imm, from, from3, to := unpackOps4(p)
  4565  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4566  				ab.asmand(ctxt, cursym, p, from, to)
  4567  				ab.Put1(byte(imm.Offset))
  4568  
  4569  			case Zvex_i_rm_r:
  4570  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4571  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4572  				ab.Put1(byte(p.From.Offset))
  4573  
  4574  			case Zvex_v_rm_r:
  4575  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4576  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4577  
  4578  			case Zvex_r_v_rm:
  4579  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4580  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4581  
  4582  			case Zvex_rm_r_vo:
  4583  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4584  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4585  
  4586  			case Zvex_i_r_rm:
  4587  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4588  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4589  				ab.Put1(byte(p.From.Offset))
  4590  
  4591  			case Zvex_hr_rm_v_r:
  4592  				hr, from, from3, to := unpackOps4(p)
  4593  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4594  				ab.asmand(ctxt, cursym, p, from, to)
  4595  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4596  
  4597  			case Zevex_k_rmo:
  4598  				ab.evex = newEVEXBits(z, &o.op)
  4599  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4600  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4601  
  4602  			case Zevex_i_rm_vo:
  4603  				ab.evex = newEVEXBits(z, &o.op)
  4604  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4605  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4606  				ab.Put1(byte(p.From.Offset))
  4607  
  4608  			case Zevex_i_rm_k_vo:
  4609  				imm, from, kmask, to := unpackOps4(p)
  4610  				ab.evex = newEVEXBits(z, &o.op)
  4611  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4612  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4613  				ab.Put1(byte(imm.Offset))
  4614  
  4615  			case Zevex_i_r_rm:
  4616  				ab.evex = newEVEXBits(z, &o.op)
  4617  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4618  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4619  				ab.Put1(byte(p.From.Offset))
  4620  
  4621  			case Zevex_i_r_k_rm:
  4622  				imm, from, kmask, to := unpackOps4(p)
  4623  				ab.evex = newEVEXBits(z, &o.op)
  4624  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4625  				ab.asmand(ctxt, cursym, p, to, from)
  4626  				ab.Put1(byte(imm.Offset))
  4627  
  4628  			case Zevex_i_rm_r:
  4629  				ab.evex = newEVEXBits(z, &o.op)
  4630  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4631  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4632  				ab.Put1(byte(p.From.Offset))
  4633  
  4634  			case Zevex_i_rm_k_r:
  4635  				imm, from, kmask, to := unpackOps4(p)
  4636  				ab.evex = newEVEXBits(z, &o.op)
  4637  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4638  				ab.asmand(ctxt, cursym, p, from, to)
  4639  				ab.Put1(byte(imm.Offset))
  4640  
  4641  			case Zevex_i_rm_v_r:
  4642  				imm, from, from3, to := unpackOps4(p)
  4643  				ab.evex = newEVEXBits(z, &o.op)
  4644  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4645  				ab.asmand(ctxt, cursym, p, from, to)
  4646  				ab.Put1(byte(imm.Offset))
  4647  
  4648  			case Zevex_i_rm_v_k_r:
  4649  				imm, from, from3, kmask, to := unpackOps5(p)
  4650  				ab.evex = newEVEXBits(z, &o.op)
  4651  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4652  				ab.asmand(ctxt, cursym, p, from, to)
  4653  				ab.Put1(byte(imm.Offset))
  4654  
  4655  			case Zevex_r_v_rm:
  4656  				ab.evex = newEVEXBits(z, &o.op)
  4657  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4658  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4659  
  4660  			case Zevex_rm_v_r:
  4661  				ab.evex = newEVEXBits(z, &o.op)
  4662  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4663  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4664  
  4665  			case Zevex_rm_k_r:
  4666  				ab.evex = newEVEXBits(z, &o.op)
  4667  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4668  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4669  
  4670  			case Zevex_r_k_rm:
  4671  				ab.evex = newEVEXBits(z, &o.op)
  4672  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4673  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4674  
  4675  			case Zevex_rm_v_k_r:
  4676  				from, from3, kmask, to := unpackOps4(p)
  4677  				ab.evex = newEVEXBits(z, &o.op)
  4678  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4679  				ab.asmand(ctxt, cursym, p, from, to)
  4680  
  4681  			case Zevex_r_v_k_rm:
  4682  				from, from3, kmask, to := unpackOps4(p)
  4683  				ab.evex = newEVEXBits(z, &o.op)
  4684  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4685  				ab.asmand(ctxt, cursym, p, to, from)
  4686  
  4687  			case Zr_m_xm:
  4688  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4689  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4690  
  4691  			case Zr_m_xm_nr:
  4692  				ab.rexflag = 0
  4693  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4694  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4695  
  4696  			case Zo_m:
  4697  				ab.Put1(byte(op))
  4698  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4699  
  4700  			case Zcallindreg:
  4701  				r = obj.Addrel(cursym)
  4702  				r.Off = int32(p.Pc)
  4703  				r.Type = objabi.R_CALLIND
  4704  				r.Siz = 0
  4705  				fallthrough
  4706  
  4707  			case Zo_m64:
  4708  				ab.Put1(byte(op))
  4709  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4710  
  4711  			case Zm_ibo:
  4712  				ab.Put1(byte(op))
  4713  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4714  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4715  
  4716  			case Zibo_m:
  4717  				ab.Put1(byte(op))
  4718  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4719  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4720  
  4721  			case Zibo_m_xm:
  4722  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4723  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4724  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4725  
  4726  			case Z_ib, Zib_:
  4727  				if yt.zcase == Zib_ {
  4728  					a = &p.From
  4729  				} else {
  4730  					a = &p.To
  4731  				}
  4732  				ab.Put1(byte(op))
  4733  				if p.As == AXABORT {
  4734  					ab.Put1(o.op[z+1])
  4735  				}
  4736  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4737  
  4738  			case Zib_rp:
  4739  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4740  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4741  
  4742  			case Zil_rp:
  4743  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4744  				ab.Put1(byte(op + reg[p.To.Reg]))
  4745  				if o.prefix == Pe {
  4746  					v = vaddr(ctxt, p, &p.From, nil)
  4747  					ab.PutInt16(int16(v))
  4748  				} else {
  4749  					ab.relput4(ctxt, cursym, p, &p.From)
  4750  				}
  4751  
  4752  			case Zo_iw:
  4753  				ab.Put1(byte(op))
  4754  				if p.From.Type != obj.TYPE_NONE {
  4755  					v = vaddr(ctxt, p, &p.From, nil)
  4756  					ab.PutInt16(int16(v))
  4757  				}
  4758  
  4759  			case Ziq_rp:
  4760  				v = vaddr(ctxt, p, &p.From, &rel)
  4761  				l = int(v >> 32)
  4762  				if l == 0 && rel.Siz != 8 {
  4763  					ab.rexflag &^= (0x40 | Rxw)
  4764  
  4765  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4766  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4767  					if rel.Type != 0 {
  4768  						r = obj.Addrel(cursym)
  4769  						*r = rel
  4770  						r.Off = int32(p.Pc + int64(ab.Len()))
  4771  					}
  4772  
  4773  					ab.PutInt32(int32(v))
  4774  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4775  					ab.Put1(0xc7)
  4776  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4777  
  4778  					ab.PutInt32(int32(v)) // need all 8
  4779  				} else {
  4780  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4781  					ab.Put1(byte(op + reg[p.To.Reg]))
  4782  					if rel.Type != 0 {
  4783  						r = obj.Addrel(cursym)
  4784  						*r = rel
  4785  						r.Off = int32(p.Pc + int64(ab.Len()))
  4786  					}
  4787  
  4788  					ab.PutInt64(v)
  4789  				}
  4790  
  4791  			case Zib_rr:
  4792  				ab.Put1(byte(op))
  4793  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4794  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4795  
  4796  			case Z_il, Zil_:
  4797  				if yt.zcase == Zil_ {
  4798  					a = &p.From
  4799  				} else {
  4800  					a = &p.To
  4801  				}
  4802  				ab.Put1(byte(op))
  4803  				if o.prefix == Pe {
  4804  					v = vaddr(ctxt, p, a, nil)
  4805  					ab.PutInt16(int16(v))
  4806  				} else {
  4807  					ab.relput4(ctxt, cursym, p, a)
  4808  				}
  4809  
  4810  			case Zm_ilo, Zilo_m:
  4811  				ab.Put1(byte(op))
  4812  				if yt.zcase == Zilo_m {
  4813  					a = &p.From
  4814  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4815  				} else {
  4816  					a = &p.To
  4817  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4818  				}
  4819  
  4820  				if o.prefix == Pe {
  4821  					v = vaddr(ctxt, p, a, nil)
  4822  					ab.PutInt16(int16(v))
  4823  				} else {
  4824  					ab.relput4(ctxt, cursym, p, a)
  4825  				}
  4826  
  4827  			case Zil_rr:
  4828  				ab.Put1(byte(op))
  4829  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4830  				if o.prefix == Pe {
  4831  					v = vaddr(ctxt, p, &p.From, nil)
  4832  					ab.PutInt16(int16(v))
  4833  				} else {
  4834  					ab.relput4(ctxt, cursym, p, &p.From)
  4835  				}
  4836  
  4837  			case Z_rp:
  4838  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4839  				ab.Put1(byte(op + reg[p.To.Reg]))
  4840  
  4841  			case Zrp_:
  4842  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4843  				ab.Put1(byte(op + reg[p.From.Reg]))
  4844  
  4845  			case Zcallcon, Zjmpcon:
  4846  				if yt.zcase == Zcallcon {
  4847  					ab.Put1(byte(op))
  4848  				} else {
  4849  					ab.Put1(o.op[z+1])
  4850  				}
  4851  				r = obj.Addrel(cursym)
  4852  				r.Off = int32(p.Pc + int64(ab.Len()))
  4853  				r.Type = objabi.R_PCREL
  4854  				r.Siz = 4
  4855  				r.Add = p.To.Offset
  4856  				ab.PutInt32(0)
  4857  
  4858  			case Zcallind:
  4859  				ab.Put2(byte(op), o.op[z+1])
  4860  				r = obj.Addrel(cursym)
  4861  				r.Off = int32(p.Pc + int64(ab.Len()))
  4862  				if ctxt.Arch.Family == sys.AMD64 {
  4863  					r.Type = objabi.R_PCREL
  4864  				} else {
  4865  					r.Type = objabi.R_ADDR
  4866  				}
  4867  				r.Siz = 4
  4868  				r.Add = p.To.Offset
  4869  				r.Sym = p.To.Sym
  4870  				ab.PutInt32(0)
  4871  
  4872  			case Zcall, Zcallduff:
  4873  				if p.To.Sym == nil {
  4874  					ctxt.Diag("call without target")
  4875  					ctxt.DiagFlush()
  4876  					log.Fatalf("bad code")
  4877  				}
  4878  
  4879  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4880  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4881  				}
  4882  
  4883  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4884  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4885  					// (the call jumps into the middle of the function).
  4886  					// This makes it possible to see call sites for duffcopy/duffzero in
  4887  					// BP-based profiling tools like Linux perf (which is the
  4888  					// whole point of maintaining frame pointers in Go).
  4889  					// MOVQ BP, -16(SP)
  4890  					// LEAQ -16(SP), BP
  4891  					ab.Put(bpduff1)
  4892  				}
  4893  				ab.Put1(byte(op))
  4894  				r = obj.Addrel(cursym)
  4895  				r.Off = int32(p.Pc + int64(ab.Len()))
  4896  				r.Sym = p.To.Sym
  4897  				r.Add = p.To.Offset
  4898  				r.Type = objabi.R_CALL
  4899  				r.Siz = 4
  4900  				ab.PutInt32(0)
  4901  
  4902  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4903  					// Pop BP pushed above.
  4904  					// MOVQ 0(BP), BP
  4905  					ab.Put(bpduff2)
  4906  				}
  4907  
  4908  			// TODO: jump across functions needs reloc
  4909  			case Zbr, Zjmp, Zloop:
  4910  				if p.As == AXBEGIN {
  4911  					ab.Put1(byte(op))
  4912  				}
  4913  				if p.To.Sym != nil {
  4914  					if yt.zcase != Zjmp {
  4915  						ctxt.Diag("branch to ATEXT")
  4916  						ctxt.DiagFlush()
  4917  						log.Fatalf("bad code")
  4918  					}
  4919  
  4920  					ab.Put1(o.op[z+1])
  4921  					r = obj.Addrel(cursym)
  4922  					r.Off = int32(p.Pc + int64(ab.Len()))
  4923  					r.Sym = p.To.Sym
  4924  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4925  					// it can point to a trampoline instead of the destination itself.
  4926  					r.Type = objabi.R_CALL
  4927  					r.Siz = 4
  4928  					ab.PutInt32(0)
  4929  					break
  4930  				}
  4931  
  4932  				// Assumes q is in this function.
  4933  				// TODO: Check in input, preserve in brchain.
  4934  
  4935  				// Fill in backward jump now.
  4936  				q = p.To.Target()
  4937  
  4938  				if q == nil {
  4939  					ctxt.Diag("jmp/branch/loop without target")
  4940  					ctxt.DiagFlush()
  4941  					log.Fatalf("bad code")
  4942  				}
  4943  
  4944  				if p.Back&branchBackwards != 0 {
  4945  					v = q.Pc - (p.Pc + 2)
  4946  					if v >= -128 && p.As != AXBEGIN {
  4947  						if p.As == AJCXZL {
  4948  							ab.Put1(0x67)
  4949  						}
  4950  						ab.Put2(byte(op), byte(v))
  4951  					} else if yt.zcase == Zloop {
  4952  						ctxt.Diag("loop too far: %v", p)
  4953  					} else {
  4954  						v -= 5 - 2
  4955  						if p.As == AXBEGIN {
  4956  							v--
  4957  						}
  4958  						if yt.zcase == Zbr {
  4959  							ab.Put1(0x0f)
  4960  							v--
  4961  						}
  4962  
  4963  						ab.Put1(o.op[z+1])
  4964  						ab.PutInt32(int32(v))
  4965  					}
  4966  
  4967  					break
  4968  				}
  4969  
  4970  				// Annotate target; will fill in later.
  4971  				p.Forwd = q.Rel
  4972  
  4973  				q.Rel = p
  4974  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4975  					if p.As == AJCXZL {
  4976  						ab.Put1(0x67)
  4977  					}
  4978  					ab.Put2(byte(op), 0)
  4979  				} else if yt.zcase == Zloop {
  4980  					ctxt.Diag("loop too far: %v", p)
  4981  				} else {
  4982  					if yt.zcase == Zbr {
  4983  						ab.Put1(0x0f)
  4984  					}
  4985  					ab.Put1(o.op[z+1])
  4986  					ab.PutInt32(0)
  4987  				}
  4988  
  4989  			case Zbyte:
  4990  				v = vaddr(ctxt, p, &p.From, &rel)
  4991  				if rel.Siz != 0 {
  4992  					rel.Siz = uint8(op)
  4993  					r = obj.Addrel(cursym)
  4994  					*r = rel
  4995  					r.Off = int32(p.Pc + int64(ab.Len()))
  4996  				}
  4997  
  4998  				ab.Put1(byte(v))
  4999  				if op > 1 {
  5000  					ab.Put1(byte(v >> 8))
  5001  					if op > 2 {
  5002  						ab.PutInt16(int16(v >> 16))
  5003  						if op > 4 {
  5004  							ab.PutInt32(int32(v >> 32))
  5005  						}
  5006  					}
  5007  				}
  5008  			}
  5009  
  5010  			return
  5011  		}
  5012  	}
  5013  	f3t = Ynone * Ymax
  5014  	if p.GetFrom3() != nil {
  5015  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  5016  	}
  5017  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  5018  		var pp obj.Prog
  5019  		var t []byte
  5020  		if p.As == mo[0].as {
  5021  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  5022  				t = mo[0].op[:]
  5023  				switch mo[0].code {
  5024  				default:
  5025  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  5026  
  5027  				case movLit:
  5028  					for z = 0; t[z] != 0; z++ {
  5029  						ab.Put1(t[z])
  5030  					}
  5031  
  5032  				case movRegMem:
  5033  					ab.Put1(t[0])
  5034  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  5035  
  5036  				case movMemReg:
  5037  					ab.Put1(t[0])
  5038  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  5039  
  5040  				case movRegMem2op: // r,m - 2op
  5041  					ab.Put2(t[0], t[1])
  5042  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  5043  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  5044  
  5045  				case movMemReg2op:
  5046  					ab.Put2(t[0], t[1])
  5047  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5048  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5049  
  5050  				case movFullPtr:
  5051  					if t[0] != 0 {
  5052  						ab.Put1(t[0])
  5053  					}
  5054  					switch p.To.Index {
  5055  					default:
  5056  						goto bad
  5057  
  5058  					case REG_DS:
  5059  						ab.Put1(0xc5)
  5060  
  5061  					case REG_SS:
  5062  						ab.Put2(0x0f, 0xb2)
  5063  
  5064  					case REG_ES:
  5065  						ab.Put1(0xc4)
  5066  
  5067  					case REG_FS:
  5068  						ab.Put2(0x0f, 0xb4)
  5069  
  5070  					case REG_GS:
  5071  						ab.Put2(0x0f, 0xb5)
  5072  					}
  5073  
  5074  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5075  
  5076  				case movDoubleShift:
  5077  					if t[0] == Pw {
  5078  						if ctxt.Arch.Family != sys.AMD64 {
  5079  							ctxt.Diag("asmins: illegal 64: %v", p)
  5080  						}
  5081  						ab.rexflag |= Pw
  5082  						t = t[1:]
  5083  					} else if t[0] == Pe {
  5084  						ab.Put1(Pe)
  5085  						t = t[1:]
  5086  					}
  5087  
  5088  					switch p.From.Type {
  5089  					default:
  5090  						goto bad
  5091  
  5092  					case obj.TYPE_CONST:
  5093  						ab.Put2(0x0f, t[0])
  5094  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5095  						ab.Put1(byte(p.From.Offset))
  5096  
  5097  					case obj.TYPE_REG:
  5098  						switch p.From.Reg {
  5099  						default:
  5100  							goto bad
  5101  
  5102  						case REG_CL, REG_CX:
  5103  							ab.Put2(0x0f, t[1])
  5104  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5105  						}
  5106  					}
  5107  
  5108  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5109  				// where you load the TLS base register into a register and then index off that
  5110  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5111  				// are handled in prefixof above and should not be listed here.
  5112  				case movTLSReg:
  5113  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5114  						ctxt.Diag("invalid load of TLS: %v", p)
  5115  					}
  5116  
  5117  					if ctxt.Arch.Family == sys.I386 {
  5118  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5119  						// where you load the TLS base register into a register and then index off that
  5120  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5121  						// are handled in prefixof above and should not be listed here.
  5122  						switch ctxt.Headtype {
  5123  						default:
  5124  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5125  
  5126  						case objabi.Hlinux, objabi.Hfreebsd:
  5127  							if ctxt.Flag_shared {
  5128  								// Note that this is not generating the same insns as the other cases.
  5129  								//     MOV TLS, dst
  5130  								// becomes
  5131  								//     call __x86.get_pc_thunk.dst
  5132  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5133  								// which is encoded as
  5134  								//     call __x86.get_pc_thunk.dst
  5135  								//     movq 0(dst), dst
  5136  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5137  								// is g, which we can't check here, but will when we assemble the second
  5138  								// instruction.
  5139  								dst := p.To.Reg
  5140  								ab.Put1(0xe8)
  5141  								r = obj.Addrel(cursym)
  5142  								r.Off = int32(p.Pc + int64(ab.Len()))
  5143  								r.Type = objabi.R_CALL
  5144  								r.Siz = 4
  5145  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5146  								ab.PutInt32(0)
  5147  
  5148  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5149  								r = obj.Addrel(cursym)
  5150  								r.Off = int32(p.Pc + int64(ab.Len()))
  5151  								r.Type = objabi.R_TLS_IE
  5152  								r.Siz = 4
  5153  								r.Add = 2
  5154  								ab.PutInt32(0)
  5155  							} else {
  5156  								// ELF TLS base is 0(GS).
  5157  								pp.From = p.From
  5158  
  5159  								pp.From.Type = obj.TYPE_MEM
  5160  								pp.From.Reg = REG_GS
  5161  								pp.From.Offset = 0
  5162  								pp.From.Index = REG_NONE
  5163  								pp.From.Scale = 0
  5164  								ab.Put2(0x65, // GS
  5165  									0x8B)
  5166  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5167  							}
  5168  						case objabi.Hplan9:
  5169  							pp.From = obj.Addr{}
  5170  							pp.From.Type = obj.TYPE_MEM
  5171  							pp.From.Name = obj.NAME_EXTERN
  5172  							pp.From.Sym = plan9privates
  5173  							pp.From.Offset = 0
  5174  							pp.From.Index = REG_NONE
  5175  							ab.Put1(0x8B)
  5176  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5177  						}
  5178  						break
  5179  					}
  5180  
  5181  					switch ctxt.Headtype {
  5182  					default:
  5183  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5184  
  5185  					case objabi.Hlinux, objabi.Hfreebsd:
  5186  						if !ctxt.Flag_shared {
  5187  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5188  						}
  5189  						// Note that this is not generating the same insn as the other cases.
  5190  						//     MOV TLS, R_to
  5191  						// becomes
  5192  						//     movq g@gottpoff(%rip), R_to
  5193  						// which is encoded as
  5194  						//     movq 0(%rip), R_to
  5195  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5196  						// is g, which we can't check here, but will when we assemble the second
  5197  						// instruction.
  5198  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5199  
  5200  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5201  						r = obj.Addrel(cursym)
  5202  						r.Off = int32(p.Pc + int64(ab.Len()))
  5203  						r.Type = objabi.R_TLS_IE
  5204  						r.Siz = 4
  5205  						r.Add = -4
  5206  						ab.PutInt32(0)
  5207  
  5208  					case objabi.Hplan9:
  5209  						pp.From = obj.Addr{}
  5210  						pp.From.Type = obj.TYPE_MEM
  5211  						pp.From.Name = obj.NAME_EXTERN
  5212  						pp.From.Sym = plan9privates
  5213  						pp.From.Offset = 0
  5214  						pp.From.Index = REG_NONE
  5215  						ab.rexflag |= Pw
  5216  						ab.Put1(0x8B)
  5217  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5218  
  5219  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5220  						// TLS base is 0(FS).
  5221  						pp.From = p.From
  5222  
  5223  						pp.From.Type = obj.TYPE_MEM
  5224  						pp.From.Name = obj.NAME_NONE
  5225  						pp.From.Reg = REG_NONE
  5226  						pp.From.Offset = 0
  5227  						pp.From.Index = REG_NONE
  5228  						pp.From.Scale = 0
  5229  						ab.rexflag |= Pw
  5230  						ab.Put2(0x64, // FS
  5231  							0x8B)
  5232  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5233  					}
  5234  				}
  5235  				return
  5236  			}
  5237  		}
  5238  	}
  5239  	goto bad
  5240  
  5241  bad:
  5242  	if ctxt.Arch.Family != sys.AMD64 {
  5243  		// here, the assembly has failed.
  5244  		// if it's a byte instruction that has
  5245  		// unaddressable registers, try to
  5246  		// exchange registers and reissue the
  5247  		// instruction with the operands renamed.
  5248  		pp := *p
  5249  
  5250  		unbytereg(&pp.From, &pp.Ft)
  5251  		unbytereg(&pp.To, &pp.Tt)
  5252  
  5253  		z := int(p.From.Reg)
  5254  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5255  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5256  			// For now, different to keep bit-for-bit compatibility.
  5257  			if ctxt.Arch.Family == sys.I386 {
  5258  				breg := byteswapreg(ctxt, &p.To)
  5259  				if breg != REG_AX {
  5260  					ab.Put1(0x87) // xchg lhs,bx
  5261  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5262  					subreg(&pp, z, breg)
  5263  					ab.doasm(ctxt, cursym, &pp)
  5264  					ab.Put1(0x87) // xchg lhs,bx
  5265  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5266  				} else {
  5267  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5268  					subreg(&pp, z, REG_AX)
  5269  					ab.doasm(ctxt, cursym, &pp)
  5270  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5271  				}
  5272  				return
  5273  			}
  5274  
  5275  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5276  				// We certainly don't want to exchange
  5277  				// with AX if the op is MUL or DIV.
  5278  				ab.Put1(0x87) // xchg lhs,bx
  5279  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5280  				subreg(&pp, z, REG_BX)
  5281  				ab.doasm(ctxt, cursym, &pp)
  5282  				ab.Put1(0x87) // xchg lhs,bx
  5283  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5284  			} else {
  5285  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5286  				subreg(&pp, z, REG_AX)
  5287  				ab.doasm(ctxt, cursym, &pp)
  5288  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5289  			}
  5290  			return
  5291  		}
  5292  
  5293  		z = int(p.To.Reg)
  5294  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5295  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5296  			// For now, different to keep bit-for-bit compatibility.
  5297  			if ctxt.Arch.Family == sys.I386 {
  5298  				breg := byteswapreg(ctxt, &p.From)
  5299  				if breg != REG_AX {
  5300  					ab.Put1(0x87) //xchg rhs,bx
  5301  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5302  					subreg(&pp, z, breg)
  5303  					ab.doasm(ctxt, cursym, &pp)
  5304  					ab.Put1(0x87) // xchg rhs,bx
  5305  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5306  				} else {
  5307  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5308  					subreg(&pp, z, REG_AX)
  5309  					ab.doasm(ctxt, cursym, &pp)
  5310  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5311  				}
  5312  				return
  5313  			}
  5314  
  5315  			if isax(&p.From) {
  5316  				ab.Put1(0x87) // xchg rhs,bx
  5317  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5318  				subreg(&pp, z, REG_BX)
  5319  				ab.doasm(ctxt, cursym, &pp)
  5320  				ab.Put1(0x87) // xchg rhs,bx
  5321  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5322  			} else {
  5323  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5324  				subreg(&pp, z, REG_AX)
  5325  				ab.doasm(ctxt, cursym, &pp)
  5326  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5327  			}
  5328  			return
  5329  		}
  5330  	}
  5331  
  5332  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5333  }
  5334  
  5335  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5336  // which is not referenced in a.
  5337  // If a is empty, it returns BX to account for MULB-like instructions
  5338  // that might use DX and AX.
  5339  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5340  	cana, canb, canc, cand := true, true, true, true
  5341  	if a.Type == obj.TYPE_NONE {
  5342  		cana, cand = false, false
  5343  	}
  5344  
  5345  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5346  		switch a.Reg {
  5347  		case REG_NONE:
  5348  			cana, cand = false, false
  5349  		case REG_AX, REG_AL, REG_AH:
  5350  			cana = false
  5351  		case REG_BX, REG_BL, REG_BH:
  5352  			canb = false
  5353  		case REG_CX, REG_CL, REG_CH:
  5354  			canc = false
  5355  		case REG_DX, REG_DL, REG_DH:
  5356  			cand = false
  5357  		}
  5358  	}
  5359  
  5360  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5361  		switch a.Index {
  5362  		case REG_AX:
  5363  			cana = false
  5364  		case REG_BX:
  5365  			canb = false
  5366  		case REG_CX:
  5367  			canc = false
  5368  		case REG_DX:
  5369  			cand = false
  5370  		}
  5371  	}
  5372  
  5373  	switch {
  5374  	case cana:
  5375  		return REG_AX
  5376  	case canb:
  5377  		return REG_BX
  5378  	case canc:
  5379  		return REG_CX
  5380  	case cand:
  5381  		return REG_DX
  5382  	default:
  5383  		ctxt.Diag("impossible byte register")
  5384  		ctxt.DiagFlush()
  5385  		log.Fatalf("bad code")
  5386  		return 0
  5387  	}
  5388  }
  5389  
  5390  func isbadbyte(a *obj.Addr) bool {
  5391  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5392  }
  5393  
  5394  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5395  	ab.Reset()
  5396  
  5397  	ab.rexflag = 0
  5398  	ab.vexflag = false
  5399  	ab.evexflag = false
  5400  	mark := ab.Len()
  5401  	ab.doasm(ctxt, cursym, p)
  5402  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5403  		// as befits the whole approach of the architecture,
  5404  		// the rex prefix must appear before the first opcode byte
  5405  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5406  		// before the 0f opcode escape!), or it might be ignored.
  5407  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5408  		if ctxt.Arch.Family != sys.AMD64 {
  5409  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5410  		}
  5411  		n := ab.Len()
  5412  		var np int
  5413  		for np = mark; np < n; np++ {
  5414  			c := ab.At(np)
  5415  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5416  				break
  5417  			}
  5418  		}
  5419  		ab.Insert(np, byte(0x40|ab.rexflag))
  5420  	}
  5421  
  5422  	n := ab.Len()
  5423  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5424  		r := &cursym.R[i]
  5425  		if int64(r.Off) < p.Pc {
  5426  			break
  5427  		}
  5428  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5429  			r.Off++
  5430  		}
  5431  		if r.Type == objabi.R_PCREL {
  5432  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5433  				// PC-relative addressing is relative to the end of the instruction,
  5434  				// but the relocations applied by the linker are relative to the end
  5435  				// of the relocation. Because immediate instruction
  5436  				// arguments can follow the PC-relative memory reference in the
  5437  				// instruction encoding, the two may not coincide. In this case,
  5438  				// adjust addend so that linker can keep relocating relative to the
  5439  				// end of the relocation.
  5440  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5441  			} else if ctxt.Arch.Family == sys.I386 {
  5442  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5443  				// assumes that the previous instruction loaded the PC of the end
  5444  				// of that instruction into CX, so the adjustment is relative to
  5445  				// that.
  5446  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5447  			}
  5448  		}
  5449  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5450  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5451  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5452  		}
  5453  
  5454  	}
  5455  }
  5456  
  5457  // unpackOps4 extracts 4 operands from p.
  5458  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5459  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5460  }
  5461  
  5462  // unpackOps5 extracts 5 operands from p.
  5463  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5464  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5465  }
  5466  

View as plain text