...

Text file src/crypto/internal/bigmod/nat_ppc64x.s

Documentation: crypto/internal/bigmod

     1// Copyright 2013 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego && (ppc64 || ppc64le)
     6
     7#include "textflag.h"
     8
     9// func addMulVVW1024(z, x *uint, y uint) (c uint)
    10TEXT ·addMulVVW1024(SB), $0-32
    11	MOVD	$4, R6 // R6 = z_len/4
    12	JMP		addMulVVWx<>(SB)
    13
    14// func addMulVVW1536(z, x *uint, y uint) (c uint)
    15TEXT ·addMulVVW1536(SB), $0-32
    16	MOVD	$6, R6 // R6 = z_len/4
    17	JMP		addMulVVWx<>(SB)
    18
    19// func addMulVVW2048(z, x *uint, y uint) (c uint)
    20TEXT ·addMulVVW2048(SB), $0-32
    21	MOVD	$8, R6 // R6 = z_len/4
    22	JMP		addMulVVWx<>(SB)
    23
    24// This local function expects to be called only by
    25// callers above. R6 contains the z length/4
    26// since 4 values are processed for each
    27// loop iteration, and is guaranteed to be > 0.
    28// If other callers are added this function might
    29// need to change.
    30TEXT addMulVVWx<>(SB), NOSPLIT, $0
    31	MOVD	z+0(FP), R3
    32	MOVD	x+8(FP), R4
    33	MOVD	y+16(FP), R5
    34
    35	MOVD	$0, R9		// R9 = c = 0
    36	MOVD	R6, CTR		// Initialize loop counter
    37	PCALIGN	$16
    38
    39loop:
    40	MOVD	0(R4), R14	// x[i]
    41	MOVD	8(R4), R16	// x[i+1]
    42	MOVD	16(R4), R18	// x[i+2]
    43	MOVD	24(R4), R20	// x[i+3]
    44	MOVD	0(R3), R15	// z[i]
    45	MOVD	8(R3), R17	// z[i+1]
    46	MOVD	16(R3), R19	// z[i+2]
    47	MOVD	24(R3), R21	// z[i+3]
    48	MULLD	R5, R14, R10	// low x[i]*y
    49	MULHDU	R5, R14, R11	// high x[i]*y
    50	ADDC	R15, R10
    51	ADDZE	R11
    52	ADDC	R9, R10
    53	ADDZE	R11, R9
    54	MULLD	R5, R16, R14	// low x[i+1]*y
    55	MULHDU	R5, R16, R15	// high x[i+1]*y
    56	ADDC	R17, R14
    57	ADDZE	R15
    58	ADDC	R9, R14
    59	ADDZE	R15, R9
    60	MULLD	R5, R18, R16	// low x[i+2]*y
    61	MULHDU	R5, R18, R17	// high x[i+2]*y
    62	ADDC	R19, R16
    63	ADDZE	R17
    64	ADDC	R9, R16
    65	ADDZE	R17, R9
    66	MULLD	R5, R20, R18	// low x[i+3]*y
    67	MULHDU	R5, R20, R19	// high x[i+3]*y
    68	ADDC	R21, R18
    69	ADDZE	R19
    70	ADDC	R9, R18
    71	ADDZE	R19, R9
    72	MOVD	R10, 0(R3)	// z[i]
    73	MOVD	R14, 8(R3)	// z[i+1]
    74	MOVD	R16, 16(R3)	// z[i+2]
    75	MOVD	R18, 24(R3)	// z[i+3]
    76	ADD	$32, R3
    77	ADD	$32, R4
    78	BDNZ	loop
    79
    80done:
    81	MOVD	R9, c+24(FP)
    82	RET

View as plain text