memmove_386.s

Documentation: runtime

     1// Inferno's libkern/memmove-386.s
     2// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-386.s
     3//
     4//         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
     5//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6//         Portions Copyright 2009 The Go Authors. All rights reserved.
     7//
     8// Permission is hereby granted, free of charge, to any person obtaining a copy
     9// of this software and associated documentation files (the "Software"), to deal
    10// in the Software without restriction, including without limitation the rights
    11// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12// copies of the Software, and to permit persons to whom the Software is
    13// furnished to do so, subject to the following conditions:
    14//
    15// The above copyright notice and this permission notice shall be included in
    16// all copies or substantial portions of the Software.
    17//
    18// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24// THE SOFTWARE.
    25
    26//go:build !plan9
    27
    28#include "go_asm.h"
    29#include "textflag.h"
    30
    31// See memmove Go doc for important implementation constraints.
    32
    33// func memmove(to, from unsafe.Pointer, n uintptr)
    34TEXT runtime·memmove(SB), NOSPLIT, $0-12
    35	MOVL	to+0(FP), DI
    36	MOVL	from+4(FP), SI
    37	MOVL	n+8(FP), BX
    38
    39	// REP instructions have a high startup cost, so we handle small sizes
    40	// with some straightline code. The REP MOVSL instruction is really fast
    41	// for large sizes. The cutover is approximately 1K.  We implement up to
    42	// 128 because that is the maximum SSE register load (loading all data
    43	// into registers lets us ignore copy direction).
    44tail:
    45	// BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing.
    46	TESTL	BX, BX
    47	JEQ	move_0
    48	CMPL	BX, $2
    49	JBE	move_1or2
    50	CMPL	BX, $4
    51	JB	move_3
    52	JE	move_4
    53	CMPL	BX, $8
    54	JBE	move_5through8
    55	CMPL	BX, $16
    56	JBE	move_9through16
    57#ifdef GO386_softfloat
    58	JMP	nosse2
    59#endif
    60	CMPL	BX, $32
    61	JBE	move_17through32
    62	CMPL	BX, $64
    63	JBE	move_33through64
    64	CMPL	BX, $128
    65	JBE	move_65through128
    66
    67nosse2:
    68/*
    69 * check and set for backwards
    70 */
    71	CMPL	SI, DI
    72	JLS	back
    73
    74/*
    75 * forward copy loop
    76 */
    77forward:
    78	// If REP MOVSB isn't fast, don't use it
    79	CMPB	internal∕cpu·X86+const_offsetX86HasERMS(SB), $1 // enhanced REP MOVSB/STOSB
    80	JNE	fwdBy4
    81
    82	// Check alignment
    83	MOVL	SI, AX
    84	ORL	DI, AX
    85	TESTL	$3, AX
    86	JEQ	fwdBy4
    87
    88	// Do 1 byte at a time
    89	MOVL	BX, CX
    90	REP;	MOVSB
    91	RET
    92
    93fwdBy4:
    94	// Do 4 bytes at a time
    95	MOVL	BX, CX
    96	SHRL	$2, CX
    97	ANDL	$3, BX
    98	REP;	MOVSL
    99	JMP	tail
   100
   101/*
   102 * check overlap
   103 */
   104back:
   105	MOVL	SI, CX
   106	ADDL	BX, CX
   107	CMPL	CX, DI
   108	JLS	forward
   109/*
   110 * whole thing backwards has
   111 * adjusted addresses
   112 */
   113
   114	ADDL	BX, DI
   115	ADDL	BX, SI
   116	STD
   117
   118/*
   119 * copy
   120 */
   121	MOVL	BX, CX
   122	SHRL	$2, CX
   123	ANDL	$3, BX
   124
   125	SUBL	$4, DI
   126	SUBL	$4, SI
   127	REP;	MOVSL
   128
   129	CLD
   130	ADDL	$4, DI
   131	ADDL	$4, SI
   132	SUBL	BX, DI
   133	SUBL	BX, SI
   134	JMP	tail
   135
   136move_1or2:
   137	MOVB	(SI), AX
   138	MOVB	-1(SI)(BX*1), CX
   139	MOVB	AX, (DI)
   140	MOVB	CX, -1(DI)(BX*1)
   141	RET
   142move_0:
   143	RET
   144move_3:
   145	MOVW	(SI), AX
   146	MOVB	2(SI), CX
   147	MOVW	AX, (DI)
   148	MOVB	CX, 2(DI)
   149	RET
   150move_4:
   151	// We need a separate case for 4 to make sure we write pointers atomically.
   152	MOVL	(SI), AX
   153	MOVL	AX, (DI)
   154	RET
   155move_5through8:
   156	MOVL	(SI), AX
   157	MOVL	-4(SI)(BX*1), CX
   158	MOVL	AX, (DI)
   159	MOVL	CX, -4(DI)(BX*1)
   160	RET
   161move_9through16:
   162	MOVL	(SI), AX
   163	MOVL	4(SI), CX
   164	MOVL	-8(SI)(BX*1), DX
   165	MOVL	-4(SI)(BX*1), BP
   166	MOVL	AX, (DI)
   167	MOVL	CX, 4(DI)
   168	MOVL	DX, -8(DI)(BX*1)
   169	MOVL	BP, -4(DI)(BX*1)
   170	RET
   171move_17through32:
   172	MOVOU	(SI), X0
   173	MOVOU	-16(SI)(BX*1), X1
   174	MOVOU	X0, (DI)
   175	MOVOU	X1, -16(DI)(BX*1)
   176	RET
   177move_33through64:
   178	MOVOU	(SI), X0
   179	MOVOU	16(SI), X1
   180	MOVOU	-32(SI)(BX*1), X2
   181	MOVOU	-16(SI)(BX*1), X3
   182	MOVOU	X0, (DI)
   183	MOVOU	X1, 16(DI)
   184	MOVOU	X2, -32(DI)(BX*1)
   185	MOVOU	X3, -16(DI)(BX*1)
   186	RET
   187move_65through128:
   188	MOVOU	(SI), X0
   189	MOVOU	16(SI), X1
   190	MOVOU	32(SI), X2
   191	MOVOU	48(SI), X3
   192	MOVOU	-64(SI)(BX*1), X4
   193	MOVOU	-48(SI)(BX*1), X5
   194	MOVOU	-32(SI)(BX*1), X6
   195	MOVOU	-16(SI)(BX*1), X7
   196	MOVOU	X0, (DI)
   197	MOVOU	X1, 16(DI)
   198	MOVOU	X2, 32(DI)
   199	MOVOU	X3, 48(DI)
   200	MOVOU	X4, -64(DI)(BX*1)
   201	MOVOU	X5, -48(DI)(BX*1)
   202	MOVOU	X6, -32(DI)(BX*1)
   203	MOVOU	X7, -16(DI)(BX*1)
   204	RET
View as plain text