crypto/bn256/cloudflare: fix asm for dynamic linking (#24476)

When using -buildmode=shared, R15 is clobbered by a global variable
access; use a different register instead.

Fixes: #24439
pull/24522/head
uji 3 years ago committed by GitHub
parent 0a4ec1dde5
commit ec64358ac9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 14
      crypto/bn256/cloudflare/gfp_amd64.s
  2. 6
      crypto/bn256/cloudflare/mul_amd64.h
  3. 12
      crypto/bn256/cloudflare/mul_bmi2_amd64.h

@ -49,7 +49,7 @@ TEXT ·gfpNeg(SB),0,$0-16
SBBQ 24(DI), R11
MOVQ $0, AX
gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,R15,BX)
gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,CX,BX)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
@ -68,7 +68,7 @@ TEXT ·gfpAdd(SB),0,$0-24
ADCQ 24(SI), R11
ADCQ $0, R12
gfpCarry(R8,R9,R10,R11,R12, R13,R14,R15,AX,BX)
gfpCarry(R8,R9,R10,R11,R12, R13,R14,CX,AX,BX)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
@ -83,7 +83,7 @@ TEXT ·gfpSub(SB),0,$0-24
MOVQ ·p2+0(SB), R12
MOVQ ·p2+8(SB), R13
MOVQ ·p2+16(SB), R14
MOVQ ·p2+24(SB), R15
MOVQ ·p2+24(SB), CX
MOVQ $0, AX
SUBQ 0(SI), R8
@ -94,12 +94,12 @@ TEXT ·gfpSub(SB),0,$0-24
CMOVQCC AX, R12
CMOVQCC AX, R13
CMOVQCC AX, R14
CMOVQCC AX, R15
CMOVQCC AX, CX
ADDQ R12, R8
ADCQ R13, R9
ADCQ R14, R10
ADCQ R15, R11
ADCQ CX, R11
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11, 0(DI))
@ -115,7 +115,7 @@ TEXT ·gfpMul(SB),0,$160-24
mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI))
storeBlock( R8, R9,R10,R11, 0(SP))
storeBlock(R12,R13,R14,R15, 32(SP))
storeBlock(R12,R13,R14,CX, 32(SP))
gfpReduceBMI2()
JMP end
@ -125,5 +125,5 @@ nobmi2Mul:
end:
MOVQ c+0(FP), DI
storeBlock(R12,R13,R14,R15, 0(DI))
storeBlock(R12,R13,R14,CX, 0(DI))
RET

@ -165,7 +165,7 @@
\
\ // Add the 512-bit intermediate to m*N
loadBlock(96+stack, R8,R9,R10,R11) \
loadBlock(128+stack, R12,R13,R14,R15) \
loadBlock(128+stack, R12,R13,R14,CX) \
\
MOVQ $0, AX \
ADDQ 0+stack, R8 \
@ -175,7 +175,7 @@
ADCQ 32+stack, R12 \
ADCQ 40+stack, R13 \
ADCQ 48+stack, R14 \
ADCQ 56+stack, R15 \
ADCQ 56+stack, CX \
ADCQ $0, AX \
\
gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX)
gfpCarry(R12,R13,R14,CX,AX, R8,R9,R10,R11,BX)

@ -29,7 +29,7 @@
ADCQ $0, R14 \
\
MOVQ a2, DX \
MOVQ $0, R15 \
MOVQ $0, CX \
MULXQ 0+rb, AX, BX \
ADDQ AX, R10 \
ADCQ BX, R11 \
@ -43,7 +43,7 @@
MULXQ 24+rb, AX, BX \
ADCQ AX, R13 \
ADCQ BX, R14 \
ADCQ $0, R15 \
ADCQ $0, CX \
\
MOVQ a3, DX \
MULXQ 0+rb, AX, BX \
@ -52,13 +52,13 @@
MULXQ 16+rb, AX, BX \
ADCQ AX, R13 \
ADCQ BX, R14 \
ADCQ $0, R15 \
ADCQ $0, CX \
MULXQ 8+rb, AX, BX \
ADDQ AX, R12 \
ADCQ BX, R13 \
MULXQ 24+rb, AX, BX \
ADCQ AX, R14 \
ADCQ BX, R15
ADCQ BX, CX
#define gfpReduceBMI2() \
\ // m = (T * N') mod R, store m in R8:R9:R10:R11
@ -106,7 +106,7 @@
ADCQ 32(SP), R12 \
ADCQ 40(SP), R13 \
ADCQ 48(SP), R14 \
ADCQ 56(SP), R15 \
ADCQ 56(SP), CX \
ADCQ $0, AX \
\
gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX)
gfpCarry(R12,R13,R14,CX,AX, R8,R9,R10,R11,BX)

Loading…
Cancel
Save