1*7dd7cddfSDavid du Colombier/* 2*7dd7cddfSDavid du Colombier * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) 3*7dd7cddfSDavid du Colombier * 4*7dd7cddfSDavid du Colombier * p -= b*m 5*7dd7cddfSDavid du Colombier * 6*7dd7cddfSDavid du Colombier * each step looks like: 7*7dd7cddfSDavid du Colombier * hi,lo = m*b[i] 8*7dd7cddfSDavid du Colombier * lo += oldhi + carry 9*7dd7cddfSDavid du Colombier * hi += carry 10*7dd7cddfSDavid du Colombier * p[i] += lo 11*7dd7cddfSDavid du Colombier * oldhi = hi 12*7dd7cddfSDavid du Colombier * 13*7dd7cddfSDavid du Colombier * the registers are: 14*7dd7cddfSDavid du Colombier * b = R1 15*7dd7cddfSDavid du Colombier * n = R4 16*7dd7cddfSDavid du Colombier * m = R5 17*7dd7cddfSDavid du Colombier * p = R6 18*7dd7cddfSDavid du Colombier * i = R7 19*7dd7cddfSDavid du Colombier * hi = R8 - constrained by hardware 20*7dd7cddfSDavid du Colombier * lo = R9 - constrained by hardware 21*7dd7cddfSDavid du Colombier * oldhi = R10 22*7dd7cddfSDavid du Colombier * tmp = R11 23*7dd7cddfSDavid du Colombier * 24*7dd7cddfSDavid du Colombier */ 25*7dd7cddfSDavid du ColombierTEXT mpvecdigmulsub(SB),$0 26*7dd7cddfSDavid du Colombier 27*7dd7cddfSDavid du Colombier MOVW n+4(FP),R4 28*7dd7cddfSDavid du Colombier MOVW m+8(FP),R5 29*7dd7cddfSDavid du Colombier MOVW p+12(FP),R6 30*7dd7cddfSDavid du Colombier 31*7dd7cddfSDavid du Colombier MOVW R0, R10 /* oldhi = 0 */ 32*7dd7cddfSDavid du Colombier_mulsubloop: 33*7dd7cddfSDavid du Colombier MOVW 0(R1), R9 /* lo = b[i] */ 34*7dd7cddfSDavid du Colombier ADDU $4, R1 35*7dd7cddfSDavid du Colombier MOVW 0(R6), R11 /* tmp = p[i] */ 36*7dd7cddfSDavid du Colombier MULU R9, R5 37*7dd7cddfSDavid du Colombier MOVW HI, R8 /* hi = (b[i] * m)>>32 */ 38*7dd7cddfSDavid du Colombier MOVW LO, R9 /* lo = b[i] * m */ 39*7dd7cddfSDavid du Colombier ADDU R10, R9 /* lo += oldhi */ 40*7dd7cddfSDavid du Colombier SGTU R10, R9, R2 41*7dd7cddfSDavid du Colombier ADDU R2, R8 /* hi += carry */ 42*7dd7cddfSDavid du Colombier SUBU R9, R11, R3 /* tmp -= lo */ 43*7dd7cddfSDavid du Colombier SGTU R3, R11, R2 44*7dd7cddfSDavid du Colombier ADDU R2, R8 /* hi += carry */ 45*7dd7cddfSDavid du Colombier MOVW R3, 0(R6) /* p[i] = tmp */ 46*7dd7cddfSDavid du Colombier ADDU $4, R6 47*7dd7cddfSDavid du Colombier MOVW R8, R10 /* oldhi = hi */ 48*7dd7cddfSDavid du Colombier SUBU $1, R4 49*7dd7cddfSDavid du Colombier BNE R4, _mulsubloop 50*7dd7cddfSDavid du Colombier 51*7dd7cddfSDavid du Colombier MOVW 0(R6), R11 /* tmp = p[i] */ 52*7dd7cddfSDavid du Colombier SUBU R10, R11, R3 /* tmp -= oldhi */ 53*7dd7cddfSDavid du Colombier MOVW R3, 0(R6) /* p[i] = tmp */ 54*7dd7cddfSDavid du Colombier SGTU R3, R11, R1 55*7dd7cddfSDavid du Colombier BNE R1, _mulsub2 56*7dd7cddfSDavid du Colombier MOVW $1, R1 /* return +1 for positive result */ 57*7dd7cddfSDavid du Colombier RET 58*7dd7cddfSDavid du Colombier 59*7dd7cddfSDavid du Colombier_mulsub2: 60*7dd7cddfSDavid du Colombier MOVW $-1, R1 /* return -1 for negative result */ 61*7dd7cddfSDavid du Colombier RET 62