1/* 2 * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) 3 * 4 * p -= b*m 5 * 6 * each step looks like: 7 * hi,lo = m*b[i] 8 * lo += oldhi + carry 9 * hi += carry 10 * p[i] += lo 11 * oldhi = hi 12 * 13 * the registers are: 14 * b = R1 15 * n = R4 16 * m = R5 17 * p = R6 18 * i = R7 19 * hi = R8 - constrained by hardware 20 * lo = R9 - constrained by hardware 21 * oldhi = R10 22 * tmp = R11 23 * 24 */ 25TEXT mpvecdigmulsub(SB),$0 26 27 MOVW n+4(FP),R4 28 MOVW m+8(FP),R5 29 MOVW p+12(FP),R6 30 31 MOVW R0, R10 /* oldhi = 0 */ 32_mulsubloop: 33 MOVW 0(R1), R9 /* lo = b[i] */ 34 ADDU $4, R1 35 MOVW 0(R6), R11 /* tmp = p[i] */ 36 MULU R9, R5 37 MOVW HI, R8 /* hi = (b[i] * m)>>32 */ 38 MOVW LO, R9 /* lo = b[i] * m */ 39 ADDU R10, R9 /* lo += oldhi */ 40 SGTU R10, R9, R2 41 ADDU R2, R8 /* hi += carry */ 42 SUBU R9, R11, R3 /* tmp -= lo */ 43 SGTU R3, R11, R2 44 ADDU R2, R8 /* hi += carry */ 45 MOVW R3, 0(R6) /* p[i] = tmp */ 46 ADDU $4, R6 47 MOVW R8, R10 /* oldhi = hi */ 48 SUBU $1, R4 49 BNE R4, _mulsubloop 50 51 MOVW 0(R6), R11 /* tmp = p[i] */ 52 SUBU R10, R11, R3 /* tmp -= oldhi */ 53 MOVW R3, 0(R6) /* p[i] = tmp */ 54 SGTU R3, R11, R1 55 BNE R1, _mulsub2 56 MOVW $1, R1 /* return +1 for positive result */ 57 RET 58 59_mulsub2: 60 MOVW $-1, R1 /* return -1 for negative result */ 61 RET 62