1#define BDNZ BC 16,0, 2#define BDNE BC 0,2, 3 4/* 5 * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p) 6 * 7 * p += b*m 8 * 9 * each step looks like: 10 * hi,lo = m*b[i] 11 * lo += oldhi + carry 12 * hi += carry 13 * p[i] += lo 14 * oldhi = hi 15 * 16 * the registers are: 17 * b = R3 18 * n = R4 19 * m = R5 20 * p = R6 21 * i = R7 22 * hi = R8 - constrained by hardware 23 * lo = R9 - constrained by hardware 24 * oldhi = R10 25 * tmp = R11 26 * 27 */ 28TEXT mpvecdigmuladd(SB),$0 29 30 MOVW n+4(FP),R4 31 MOVW m+8(FP),R5 32 MOVW p+12(FP),R6 33 SUB $4, R3 /* pre decrement for MOVWU's */ 34 SUB $4, R6 /* pre decrement for MOVWU's */ 35 36 MOVW R0, R10 37 MOVW R0, XER 38 MOVW R4, CTR 39_muladdloop: 40 MOVWU 4(R3),R9 /* lo = b[i] */ 41 MOVW 4(R6),R11 /* tmp = p[i] */ 42 MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */ 43 MULLW R9,R5,R9 /* lo = b[i] * m */ 44 ADDC R10,R9 /* lo += oldhi */ 45 ADDE R0,R8 /* hi += carry */ 46 ADDC R9,R11 /* tmp += lo */ 47 ADDE R0,R8 /* hi += carry */ 48 MOVWU R11,4(R6) /* p[i] = tmp */ 49 MOVW R8,R10 /* oldhi = hi */ 50 BDNZ _muladdloop 51 52 MOVW 4(R6),R11 /* tmp = p[i] */ 53 ADDC R10,R11 54 MOVWU R11,4(R6) /* p[i] = tmp */ 55 56 RETURN 57