1/* 2 * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p) 3 * 4 * p += b*m 5 * 6 * each step looks like: 7 * hi,lo = m*b[i] 8 * lo += oldhi + carry 9 * hi += carry 10 * p[i] += lo 11 * oldhi = hi 12 * 13 * the registers are: 14 * b = R1 15 * n = R4 16 * m = R5 17 * p = R6 18 * i = R7 19 * hi = R8 - constrained by hardware 20 * lo = R9 - constrained by hardware 21 * oldhi = R10 22 * tmp = R11 23 * 24 */ 25TEXT mpvecdigmuladd(SB),$0 26 27 MOVW n+4(FP),R4 28 MOVW m+8(FP),R5 29 MOVW p+12(FP),R6 30 31 32 MOVW R0, R10 /* oldhi = 0 */ 33 BEQ R6, _muladd1 34_muladdloop: 35 MOVW 0(R1), R9 /* lo = b[i] */ 36 ADDU $4, R1 37 MOVW 0(R6), R11 /* tmp = p[i] */ 38 MULU R9, R5 39 MOVW HI, R8 /* hi = (b[i] * m)>>32 */ 40 MOVW LO, R9 /* lo = b[i] * m */ 41 ADDU R10, R9 /* lo += oldhi */ 42 SGTU R10, R9, R2 43 ADDU R2, R8 /* hi += carry */ 44 ADDU R9, R11 /* tmp += lo */ 45 SGTU R9, R11, R2 46 ADDU R2, R8 /* hi += carry */ 47 MOVW R11, 0(R6) /* p[i] = tmp */ 48 ADDU $4, R6 49 MOVW R8, R10 /* oldhi = hi */ 50 SUBU $1, R4 51 BNE R4, _muladdloop 52 53_muladd1: 54 MOVW 0(R6), R11 /* tmp = p[i] */ 55 ADDU R10, R11 /* tmp += oldhi */ 56 MOVW R11, 0(R6) /* p[i] = tmp */ 57 58 RET 59