Official Go implementation of the Ethereum protocol
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
go-ethereum/crypto/bn256/cloudflare/gfp_amd64.s

129 lines
2.2 KiB

// +build amd64,!generic
#define storeBlock(a0,a1,a2,a3, r) \
MOVQ a0, 0+r \
MOVQ a1, 8+r \
MOVQ a2, 16+r \
MOVQ a3, 24+r
#define loadBlock(r, a0,a1,a2,a3) \
MOVQ 0+r, a0 \
MOVQ 8+r, a1 \
MOVQ 16+r, a2 \
MOVQ 24+r, a3
#define gfpCarry(a0,a1,a2,a3,a4, b0,b1,b2,b3,b4) \
\ // b = a-p
MOVQ a0, b0 \
MOVQ a1, b1 \
MOVQ a2, b2 \
MOVQ a3, b3 \
MOVQ a4, b4 \
\
SUBQ ·p2+0(SB), b0 \
SBBQ ·p2+8(SB), b1 \
SBBQ ·p2+16(SB), b2 \
SBBQ ·p2+24(SB), b3 \
SBBQ $0, b4 \
\
\ // if b is negative then return a
\ // else return b
CMOVQCC b0, a0 \
CMOVQCC b1, a1 \
CMOVQCC b2, a2 \
CMOVQCC b3, a3
#include "mul_amd64.h"
#include "mul_bmi2_amd64.h"
TEXT ·gfpNeg(SB),0,$0-16
MOVQ ·p2+0(SB), R8
MOVQ ·p2+8(SB), R9
MOVQ ·p2+16(SB), R10
MOVQ ·p2+24(SB), R11
MOVQ a+8(FP), DI
SUBQ 0(DI), R8
SBBQ 8(DI), R9
SBBQ 16(DI), R10
SBBQ 24(DI), R11
MOVQ $0, AX
gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,CX,BX)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
RET
TEXT ·gfpAdd(SB),0,$0-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
loadBlock(0(DI), R8,R9,R10,R11)
MOVQ $0, R12
ADDQ 0(SI), R8
ADCQ 8(SI), R9
ADCQ 16(SI), R10
ADCQ 24(SI), R11
ADCQ $0, R12
gfpCarry(R8,R9,R10,R11,R12, R13,R14,CX,AX,BX)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
RET
TEXT ·gfpSub(SB),0,$0-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
loadBlock(0(DI), R8,R9,R10,R11)
MOVQ ·p2+0(SB), R12
MOVQ ·p2+8(SB), R13
MOVQ ·p2+16(SB), R14
MOVQ ·p2+24(SB), CX
MOVQ $0, AX
SUBQ 0(SI), R8
SBBQ 8(SI), R9
SBBQ 16(SI), R10
SBBQ 24(SI), R11
CMOVQCC AX, R12
CMOVQCC AX, R13
CMOVQCC AX, R14
CMOVQCC AX, CX
ADDQ R12, R8
ADCQ R13, R9
ADCQ R14, R10
ADCQ CX, R11
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
RET
TEXT ·gfpMul(SB),0,$160-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
// Jump to a slightly different implementation if MULX isn't supported.
CMPB ·hasBMI2(SB), $0
JE nobmi2Mul
mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI))
storeBlock( R8, R9,R10,R11, 0(SP))
storeBlock(R12,R13,R14,CX, 32(SP))
gfpReduceBMI2()
JMP end
nobmi2Mul:
mul(0(DI),8(DI),16(DI),24(DI), 0(SI), 0(SP))
gfpReduce(0(SP))
end:
MOVQ c+0(FP), DI
storeBlock(R12,R13,R14,CX, 0(DI))
RET