1*7dd7cddfSDavid du Colombier/* 2*7dd7cddfSDavid du Colombier * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p) 3*7dd7cddfSDavid du Colombier * 4*7dd7cddfSDavid du Colombier * p += b*m 5*7dd7cddfSDavid du Colombier * 6*7dd7cddfSDavid du Colombier * each step looks like: 7*7dd7cddfSDavid du Colombier * hi,lo = m*b[i] 8*7dd7cddfSDavid du Colombier * lo += oldhi + carry 9*7dd7cddfSDavid du Colombier * hi += carry 10*7dd7cddfSDavid du Colombier * p[i] += lo 11*7dd7cddfSDavid du Colombier * oldhi = hi 12*7dd7cddfSDavid du Colombier * 13*7dd7cddfSDavid du Colombier * the registers are: 14*7dd7cddfSDavid du Colombier * b = R1 15*7dd7cddfSDavid du Colombier * n = R4 16*7dd7cddfSDavid du Colombier * m = R5 17*7dd7cddfSDavid du Colombier * p = R6 18*7dd7cddfSDavid du Colombier * i = R7 19*7dd7cddfSDavid du Colombier * hi = R8 - constrained by hardware 20*7dd7cddfSDavid du Colombier * lo = R9 - constrained by hardware 21*7dd7cddfSDavid du Colombier * oldhi = R10 22*7dd7cddfSDavid du Colombier * tmp = R11 23*7dd7cddfSDavid du Colombier * 24*7dd7cddfSDavid du Colombier */ 25*7dd7cddfSDavid du ColombierTEXT mpvecdigmuladd(SB),$0 26*7dd7cddfSDavid du Colombier 27*7dd7cddfSDavid du Colombier MOVW n+4(FP),R4 28*7dd7cddfSDavid du Colombier MOVW m+8(FP),R5 29*7dd7cddfSDavid du Colombier MOVW p+12(FP),R6 30*7dd7cddfSDavid du Colombier 31*7dd7cddfSDavid du Colombier 32*7dd7cddfSDavid du Colombier MOVW R0, R10 /* oldhi = 0 */ 33*7dd7cddfSDavid du Colombier BEQ R6, _muladd1 34*7dd7cddfSDavid du Colombier_muladdloop: 35*7dd7cddfSDavid du Colombier MOVW 0(R1), R9 /* lo = b[i] */ 36*7dd7cddfSDavid du Colombier ADDU $4, R1 37*7dd7cddfSDavid du Colombier MOVW 0(R6), R11 /* tmp = p[i] */ 38*7dd7cddfSDavid du Colombier MULU R9, R5 39*7dd7cddfSDavid du Colombier MOVW HI, R8 /* hi = (b[i] * m)>>32 */ 40*7dd7cddfSDavid du Colombier MOVW LO, R9 /* lo = b[i] * m */ 41*7dd7cddfSDavid du Colombier ADDU R10, R9 /* lo += oldhi */ 42*7dd7cddfSDavid du Colombier SGTU R10, R9, R2 43*7dd7cddfSDavid du Colombier ADDU R2, R8 /* hi += carry */ 44*7dd7cddfSDavid du Colombier ADDU R9, R11 /* tmp += lo */ 45*7dd7cddfSDavid du Colombier SGTU R9, R11, R2 46*7dd7cddfSDavid du Colombier ADDU R2, R8 /* hi += carry */ 47*7dd7cddfSDavid du Colombier MOVW R11, 0(R6) /* p[i] = tmp */ 48*7dd7cddfSDavid du Colombier ADDU $4, R6 49*7dd7cddfSDavid du Colombier MOVW R8, R10 /* oldhi = hi */ 50*7dd7cddfSDavid du Colombier SUBU $1, R4 51*7dd7cddfSDavid du Colombier BNE R4, _muladdloop 52*7dd7cddfSDavid du Colombier 53*7dd7cddfSDavid du Colombier_muladd1: 54*7dd7cddfSDavid du Colombier MOVW 0(R6), R11 /* tmp = p[i] */ 55*7dd7cddfSDavid du Colombier ADDU R10, R11 /* tmp += oldhi */ 56*7dd7cddfSDavid du Colombier MOVW R11, 0(R6) /* p[i] = tmp */ 57*7dd7cddfSDavid du Colombier 58*7dd7cddfSDavid du Colombier RET 59