1*ebfedea0SLionel Sambuc.section ".text",#alloc,#execinstr 2*ebfedea0SLionel Sambuc 3*ebfedea0SLionel Sambuc.global bn_mul_mont_fpu 4*ebfedea0SLionel Sambuc.align 32 5*ebfedea0SLionel Sambucbn_mul_mont_fpu: 6*ebfedea0SLionel Sambuc save %sp,-192-64,%sp 7*ebfedea0SLionel Sambuc 8*ebfedea0SLionel Sambuc cmp %i5,4 9*ebfedea0SLionel Sambuc bl,a,pn %icc,.Lret 10*ebfedea0SLionel Sambuc clr %i0 11*ebfedea0SLionel Sambuc andcc %i5,1,%g0 ! %i5 has to be even... 12*ebfedea0SLionel Sambuc bnz,a,pn %icc,.Lret 13*ebfedea0SLionel Sambuc clr %i0 ! signal "unsupported input value" 14*ebfedea0SLionel Sambuc 15*ebfedea0SLionel Sambuc srl %i5,1,%i5 16*ebfedea0SLionel Sambuc sethi %hi(0xffff),%l7 17*ebfedea0SLionel Sambuc ld [%i4+0],%g4 ! %g4 reassigned, remember? 18*ebfedea0SLionel Sambuc or %l7,%lo(0xffff),%l7 19*ebfedea0SLionel Sambuc ld [%i4+4],%o0 20*ebfedea0SLionel Sambuc sllx %o0,32,%o0 21*ebfedea0SLionel Sambuc or %o0,%g4,%g4 ! %g4=n0[1].n0[0] 22*ebfedea0SLionel Sambuc 23*ebfedea0SLionel Sambuc sll %i5,3,%i5 ! num*=8 24*ebfedea0SLionel Sambuc 25*ebfedea0SLionel Sambuc add %sp,2047,%o0 ! real top of stack 26*ebfedea0SLionel Sambuc sll %i5,2,%o1 27*ebfedea0SLionel Sambuc add %o1,%i5,%o1 ! %o1=num*5 28*ebfedea0SLionel Sambuc sub %o0,%o1,%o0 29*ebfedea0SLionel Sambuc and %o0,-2048,%o0 ! optimize TLB utilization 30*ebfedea0SLionel Sambuc sub %o0,2047,%sp ! alloca(5*num*8) 31*ebfedea0SLionel Sambuc 32*ebfedea0SLionel Sambuc rd %asi,%o7 ! save %asi 33*ebfedea0SLionel Sambuc add %sp,2047+192+64,%l0 34*ebfedea0SLionel Sambuc add %l0,%i5,%l1 35*ebfedea0SLionel Sambuc add %l1,%i5,%l1 ! [an]p_[lh] point at the vectors' ends ! 36*ebfedea0SLionel Sambuc add %l1,%i5,%l2 37*ebfedea0SLionel Sambuc add %l2,%i5,%l3 38*ebfedea0SLionel Sambuc add %l3,%i5,%l4 39*ebfedea0SLionel Sambuc 40*ebfedea0SLionel Sambuc wr %g0,210,%asi ! setup %asi for 16-bit FP loads 41*ebfedea0SLionel Sambuc 42*ebfedea0SLionel Sambuc add %i0,%i5,%i0 ! readjust input pointers to point 43*ebfedea0SLionel Sambuc add %i1,%i5,%i1 ! at the ends too... 44*ebfedea0SLionel Sambuc add %i2,%i5,%i2 45*ebfedea0SLionel Sambuc add %i3,%i5,%i3 46*ebfedea0SLionel Sambuc 47*ebfedea0SLionel Sambuc stx %o7,[%sp+2047+192+48] ! save %asi 48*ebfedea0SLionel Sambuc 49*ebfedea0SLionel Sambuc sub %g0,%i5,%l5 ! i=-num 50*ebfedea0SLionel Sambuc sub %g0,%i5,%l6 ! j=-num 51*ebfedea0SLionel Sambuc 52*ebfedea0SLionel Sambuc add %i1,%l6,%o3 53*ebfedea0SLionel Sambuc add %i2,%l5,%o4 54*ebfedea0SLionel Sambuc 55*ebfedea0SLionel Sambuc ld [%o3+4],%g1 ! bp[0] 56*ebfedea0SLionel Sambuc ld [%o3+0],%o0 57*ebfedea0SLionel Sambuc ld [%o4+4],%g5 ! ap[0] 58*ebfedea0SLionel Sambuc sllx %g1,32,%g1 59*ebfedea0SLionel Sambuc ld [%o4+0],%o1 60*ebfedea0SLionel Sambuc sllx %g5,32,%g5 61*ebfedea0SLionel Sambuc or %g1,%o0,%o0 62*ebfedea0SLionel Sambuc or %g5,%o1,%o1 63*ebfedea0SLionel Sambuc 64*ebfedea0SLionel Sambuc add %i3,%l6,%o5 65*ebfedea0SLionel Sambuc 66*ebfedea0SLionel Sambuc mulx %o1,%o0,%o0 ! ap[0]*bp[0] 67*ebfedea0SLionel Sambuc mulx %g4,%o0,%o0 ! ap[0]*bp[0]*n0 68*ebfedea0SLionel Sambuc stx %o0,[%sp+2047+192+0] 69*ebfedea0SLionel Sambuc 70*ebfedea0SLionel Sambuc ld [%o3+0],%f17 ! load a[j] as pair of 32-bit words 71*ebfedea0SLionel Sambuc .word 0xa1b00c20 ! fzeros %f16 72*ebfedea0SLionel Sambuc ld [%o3+4],%f19 73*ebfedea0SLionel Sambuc .word 0xa5b00c20 ! fzeros %f18 74*ebfedea0SLionel Sambuc ld [%o5+0],%f21 ! load n[j] as pair of 32-bit words 75*ebfedea0SLionel Sambuc .word 0xa9b00c20 ! fzeros %f20 76*ebfedea0SLionel Sambuc ld [%o5+4],%f23 77*ebfedea0SLionel Sambuc .word 0xadb00c20 ! fzeros %f22 78*ebfedea0SLionel Sambuc 79*ebfedea0SLionel Sambuc ! transfer b[i] to FPU as 4x16-bit values 80*ebfedea0SLionel Sambuc ldda [%o4+2]%asi,%f0 81*ebfedea0SLionel Sambuc fxtod %f16,%f16 82*ebfedea0SLionel Sambuc ldda [%o4+0]%asi,%f2 83*ebfedea0SLionel Sambuc fxtod %f18,%f18 84*ebfedea0SLionel Sambuc ldda [%o4+6]%asi,%f4 85*ebfedea0SLionel Sambuc fxtod %f20,%f20 86*ebfedea0SLionel Sambuc ldda [%o4+4]%asi,%f6 87*ebfedea0SLionel Sambuc fxtod %f22,%f22 88*ebfedea0SLionel Sambuc 89*ebfedea0SLionel Sambuc ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values 90*ebfedea0SLionel Sambuc ldda [%sp+2047+192+6]%asi,%f8 91*ebfedea0SLionel Sambuc fxtod %f0,%f0 92*ebfedea0SLionel Sambuc ldda [%sp+2047+192+4]%asi,%f10 93*ebfedea0SLionel Sambuc fxtod %f2,%f2 94*ebfedea0SLionel Sambuc ldda [%sp+2047+192+2]%asi,%f12 95*ebfedea0SLionel Sambuc fxtod %f4,%f4 96*ebfedea0SLionel Sambuc ldda [%sp+2047+192+0]%asi,%f14 97*ebfedea0SLionel Sambuc fxtod %f6,%f6 98*ebfedea0SLionel Sambuc 99*ebfedea0SLionel Sambuc std %f16,[%l1+%l6] ! save smashed ap[j] in double format 100*ebfedea0SLionel Sambuc fxtod %f8,%f8 101*ebfedea0SLionel Sambuc std %f18,[%l2+%l6] 102*ebfedea0SLionel Sambuc fxtod %f10,%f10 103*ebfedea0SLionel Sambuc std %f20,[%l3+%l6] ! save smashed np[j] in double format 104*ebfedea0SLionel Sambuc fxtod %f12,%f12 105*ebfedea0SLionel Sambuc std %f22,[%l4+%l6] 106*ebfedea0SLionel Sambuc fxtod %f14,%f14 107*ebfedea0SLionel Sambuc 108*ebfedea0SLionel Sambuc fmuld %f16,%f0,%f32 109*ebfedea0SLionel Sambuc fmuld %f20,%f8,%f48 110*ebfedea0SLionel Sambuc fmuld %f16,%f2,%f34 111*ebfedea0SLionel Sambuc fmuld %f20,%f10,%f50 112*ebfedea0SLionel Sambuc fmuld %f16,%f4,%f36 113*ebfedea0SLionel Sambuc faddd %f32,%f48,%f48 114*ebfedea0SLionel Sambuc fmuld %f20,%f12,%f52 115*ebfedea0SLionel Sambuc fmuld %f16,%f6,%f38 116*ebfedea0SLionel Sambuc faddd %f34,%f50,%f50 117*ebfedea0SLionel Sambuc fmuld %f20,%f14,%f54 118*ebfedea0SLionel Sambuc fmuld %f18,%f0,%f40 119*ebfedea0SLionel Sambuc faddd %f36,%f52,%f52 120*ebfedea0SLionel Sambuc fmuld %f22,%f8,%f56 121*ebfedea0SLionel Sambuc fmuld %f18,%f2,%f42 122*ebfedea0SLionel Sambuc faddd %f38,%f54,%f54 123*ebfedea0SLionel Sambuc fmuld %f22,%f10,%f58 124*ebfedea0SLionel Sambuc fmuld %f18,%f4,%f44 125*ebfedea0SLionel Sambuc faddd %f40,%f56,%f56 126*ebfedea0SLionel Sambuc fmuld %f22,%f12,%f60 127*ebfedea0SLionel Sambuc fmuld %f18,%f6,%f46 128*ebfedea0SLionel Sambuc faddd %f42,%f58,%f58 129*ebfedea0SLionel Sambuc fmuld %f22,%f14,%f62 130*ebfedea0SLionel Sambuc 131*ebfedea0SLionel Sambuc faddd %f44,%f60,%f24 ! %f60 132*ebfedea0SLionel Sambuc faddd %f46,%f62,%f26 ! %f62 133*ebfedea0SLionel Sambuc 134*ebfedea0SLionel Sambuc faddd %f52,%f56,%f52 135*ebfedea0SLionel Sambuc faddd %f54,%f58,%f54 136*ebfedea0SLionel Sambuc 137*ebfedea0SLionel Sambuc fdtox %f48,%f48 138*ebfedea0SLionel Sambuc fdtox %f50,%f50 139*ebfedea0SLionel Sambuc fdtox %f52,%f52 140*ebfedea0SLionel Sambuc fdtox %f54,%f54 141*ebfedea0SLionel Sambuc 142*ebfedea0SLionel Sambuc std %f48,[%sp+2047+192+0] 143*ebfedea0SLionel Sambuc add %l6,8,%l6 144*ebfedea0SLionel Sambuc std %f50,[%sp+2047+192+8] 145*ebfedea0SLionel Sambuc add %i1,%l6,%o4 146*ebfedea0SLionel Sambuc std %f52,[%sp+2047+192+16] 147*ebfedea0SLionel Sambuc add %i3,%l6,%o5 148*ebfedea0SLionel Sambuc std %f54,[%sp+2047+192+24] 149*ebfedea0SLionel Sambuc 150*ebfedea0SLionel Sambuc ld [%o4+0],%f17 ! load a[j] as pair of 32-bit words 151*ebfedea0SLionel Sambuc .word 0xa1b00c20 ! fzeros %f16 152*ebfedea0SLionel Sambuc ld [%o4+4],%f19 153*ebfedea0SLionel Sambuc .word 0xa5b00c20 ! fzeros %f18 154*ebfedea0SLionel Sambuc ld [%o5+0],%f21 ! load n[j] as pair of 32-bit words 155*ebfedea0SLionel Sambuc .word 0xa9b00c20 ! fzeros %f20 156*ebfedea0SLionel Sambuc ld [%o5+4],%f23 157*ebfedea0SLionel Sambuc .word 0xadb00c20 ! fzeros %f22 158*ebfedea0SLionel Sambuc 159*ebfedea0SLionel Sambuc fxtod %f16,%f16 160*ebfedea0SLionel Sambuc fxtod %f18,%f18 161*ebfedea0SLionel Sambuc fxtod %f20,%f20 162*ebfedea0SLionel Sambuc fxtod %f22,%f22 163*ebfedea0SLionel Sambuc 164*ebfedea0SLionel Sambuc ldx [%sp+2047+192+0],%o0 165*ebfedea0SLionel Sambuc fmuld %f16,%f0,%f32 166*ebfedea0SLionel Sambuc ldx [%sp+2047+192+8],%o1 167*ebfedea0SLionel Sambuc fmuld %f20,%f8,%f48 168*ebfedea0SLionel Sambuc ldx [%sp+2047+192+16],%o2 169*ebfedea0SLionel Sambuc fmuld %f16,%f2,%f34 170*ebfedea0SLionel Sambuc ldx [%sp+2047+192+24],%o3 171*ebfedea0SLionel Sambuc fmuld %f20,%f10,%f50 172*ebfedea0SLionel Sambuc 173*ebfedea0SLionel Sambuc srlx %o0,16,%o7 174*ebfedea0SLionel Sambuc std %f16,[%l1+%l6] ! save smashed ap[j] in double format 175*ebfedea0SLionel Sambuc fmuld %f16,%f4,%f36 176*ebfedea0SLionel Sambuc add %o7,%o1,%o1 177*ebfedea0SLionel Sambuc std %f18,[%l2+%l6] 178*ebfedea0SLionel Sambuc faddd %f32,%f48,%f48 179*ebfedea0SLionel Sambuc fmuld %f20,%f12,%f52 180*ebfedea0SLionel Sambuc srlx %o1,16,%o7 181*ebfedea0SLionel Sambuc std %f20,[%l3+%l6] ! save smashed np[j] in double format 182*ebfedea0SLionel Sambuc fmuld %f16,%f6,%f38 183*ebfedea0SLionel Sambuc add %o7,%o2,%o2 184*ebfedea0SLionel Sambuc std %f22,[%l4+%l6] 185*ebfedea0SLionel Sambuc faddd %f34,%f50,%f50 186*ebfedea0SLionel Sambuc fmuld %f20,%f14,%f54 187*ebfedea0SLionel Sambuc srlx %o2,16,%o7 188*ebfedea0SLionel Sambuc fmuld %f18,%f0,%f40 189*ebfedea0SLionel Sambuc add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] 190*ebfedea0SLionel Sambuc faddd %f36,%f52,%f52 191*ebfedea0SLionel Sambuc fmuld %f22,%f8,%f56 192*ebfedea0SLionel Sambuc !and %o0,%l7,%o0 193*ebfedea0SLionel Sambuc !and %o1,%l7,%o1 194*ebfedea0SLionel Sambuc !and %o2,%l7,%o2 195*ebfedea0SLionel Sambuc !sllx %o1,16,%o1 196*ebfedea0SLionel Sambuc !sllx %o2,32,%o2 197*ebfedea0SLionel Sambuc !sllx %o3,48,%o7 198*ebfedea0SLionel Sambuc !or %o1,%o0,%o0 199*ebfedea0SLionel Sambuc !or %o2,%o0,%o0 200*ebfedea0SLionel Sambuc !or %o7,%o0,%o0 ! 64-bit result 201*ebfedea0SLionel Sambuc srlx %o3,16,%g1 ! 34-bit carry 202*ebfedea0SLionel Sambuc fmuld %f18,%f2,%f42 203*ebfedea0SLionel Sambuc 204*ebfedea0SLionel Sambuc faddd %f38,%f54,%f54 205*ebfedea0SLionel Sambuc fmuld %f22,%f10,%f58 206*ebfedea0SLionel Sambuc fmuld %f18,%f4,%f44 207*ebfedea0SLionel Sambuc faddd %f40,%f56,%f56 208*ebfedea0SLionel Sambuc fmuld %f22,%f12,%f60 209*ebfedea0SLionel Sambuc fmuld %f18,%f6,%f46 210*ebfedea0SLionel Sambuc faddd %f42,%f58,%f58 211*ebfedea0SLionel Sambuc fmuld %f22,%f14,%f62 212*ebfedea0SLionel Sambuc 213*ebfedea0SLionel Sambuc faddd %f24,%f48,%f48 214*ebfedea0SLionel Sambuc faddd %f26,%f50,%f50 215*ebfedea0SLionel Sambuc faddd %f44,%f60,%f24 ! %f60 216*ebfedea0SLionel Sambuc faddd %f46,%f62,%f26 ! %f62 217*ebfedea0SLionel Sambuc 218*ebfedea0SLionel Sambuc faddd %f52,%f56,%f52 219*ebfedea0SLionel Sambuc faddd %f54,%f58,%f54 220*ebfedea0SLionel Sambuc 221*ebfedea0SLionel Sambuc fdtox %f48,%f48 222*ebfedea0SLionel Sambuc fdtox %f50,%f50 223*ebfedea0SLionel Sambuc fdtox %f52,%f52 224*ebfedea0SLionel Sambuc fdtox %f54,%f54 225*ebfedea0SLionel Sambuc 226*ebfedea0SLionel Sambuc std %f48,[%sp+2047+192+0] 227*ebfedea0SLionel Sambuc std %f50,[%sp+2047+192+8] 228*ebfedea0SLionel Sambuc addcc %l6,8,%l6 229*ebfedea0SLionel Sambuc std %f52,[%sp+2047+192+16] 230*ebfedea0SLionel Sambuc bz,pn %icc,.L1stskip 231*ebfedea0SLionel Sambuc std %f54,[%sp+2047+192+24] 232*ebfedea0SLionel Sambuc 233*ebfedea0SLionel Sambuc.align 32 ! incidentally already aligned ! 234*ebfedea0SLionel Sambuc.L1st: 235*ebfedea0SLionel Sambuc add %i1,%l6,%o4 236*ebfedea0SLionel Sambuc add %i3,%l6,%o5 237*ebfedea0SLionel Sambuc ld [%o4+0],%f17 ! load a[j] as pair of 32-bit words 238*ebfedea0SLionel Sambuc .word 0xa1b00c20 ! fzeros %f16 239*ebfedea0SLionel Sambuc ld [%o4+4],%f19 240*ebfedea0SLionel Sambuc .word 0xa5b00c20 ! fzeros %f18 241*ebfedea0SLionel Sambuc ld [%o5+0],%f21 ! load n[j] as pair of 32-bit words 242*ebfedea0SLionel Sambuc .word 0xa9b00c20 ! fzeros %f20 243*ebfedea0SLionel Sambuc ld [%o5+4],%f23 244*ebfedea0SLionel Sambuc .word 0xadb00c20 ! fzeros %f22 245*ebfedea0SLionel Sambuc 246*ebfedea0SLionel Sambuc fxtod %f16,%f16 247*ebfedea0SLionel Sambuc fxtod %f18,%f18 248*ebfedea0SLionel Sambuc fxtod %f20,%f20 249*ebfedea0SLionel Sambuc fxtod %f22,%f22 250*ebfedea0SLionel Sambuc 251*ebfedea0SLionel Sambuc ldx [%sp+2047+192+0],%o0 252*ebfedea0SLionel Sambuc fmuld %f16,%f0,%f32 253*ebfedea0SLionel Sambuc ldx [%sp+2047+192+8],%o1 254*ebfedea0SLionel Sambuc fmuld %f20,%f8,%f48 255*ebfedea0SLionel Sambuc ldx [%sp+2047+192+16],%o2 256*ebfedea0SLionel Sambuc fmuld %f16,%f2,%f34 257*ebfedea0SLionel Sambuc ldx [%sp+2047+192+24],%o3 258*ebfedea0SLionel Sambuc fmuld %f20,%f10,%f50 259*ebfedea0SLionel Sambuc 260*ebfedea0SLionel Sambuc srlx %o0,16,%o7 261*ebfedea0SLionel Sambuc std %f16,[%l1+%l6] ! save smashed ap[j] in double format 262*ebfedea0SLionel Sambuc fmuld %f16,%f4,%f36 263*ebfedea0SLionel Sambuc add %o7,%o1,%o1 264*ebfedea0SLionel Sambuc std %f18,[%l2+%l6] 265*ebfedea0SLionel Sambuc faddd %f32,%f48,%f48 266*ebfedea0SLionel Sambuc fmuld %f20,%f12,%f52 267*ebfedea0SLionel Sambuc srlx %o1,16,%o7 268*ebfedea0SLionel Sambuc std %f20,[%l3+%l6] ! save smashed np[j] in double format 269*ebfedea0SLionel Sambuc fmuld %f16,%f6,%f38 270*ebfedea0SLionel Sambuc add %o7,%o2,%o2 271*ebfedea0SLionel Sambuc std %f22,[%l4+%l6] 272*ebfedea0SLionel Sambuc faddd %f34,%f50,%f50 273*ebfedea0SLionel Sambuc fmuld %f20,%f14,%f54 274*ebfedea0SLionel Sambuc srlx %o2,16,%o7 275*ebfedea0SLionel Sambuc fmuld %f18,%f0,%f40 276*ebfedea0SLionel Sambuc add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] 277*ebfedea0SLionel Sambuc and %o0,%l7,%o0 278*ebfedea0SLionel Sambuc faddd %f36,%f52,%f52 279*ebfedea0SLionel Sambuc fmuld %f22,%f8,%f56 280*ebfedea0SLionel Sambuc and %o1,%l7,%o1 281*ebfedea0SLionel Sambuc and %o2,%l7,%o2 282*ebfedea0SLionel Sambuc fmuld %f18,%f2,%f42 283*ebfedea0SLionel Sambuc sllx %o1,16,%o1 284*ebfedea0SLionel Sambuc faddd %f38,%f54,%f54 285*ebfedea0SLionel Sambuc fmuld %f22,%f10,%f58 286*ebfedea0SLionel Sambuc sllx %o2,32,%o2 287*ebfedea0SLionel Sambuc fmuld %f18,%f4,%f44 288*ebfedea0SLionel Sambuc sllx %o3,48,%o7 289*ebfedea0SLionel Sambuc or %o1,%o0,%o0 290*ebfedea0SLionel Sambuc faddd %f40,%f56,%f56 291*ebfedea0SLionel Sambuc fmuld %f22,%f12,%f60 292*ebfedea0SLionel Sambuc or %o2,%o0,%o0 293*ebfedea0SLionel Sambuc fmuld %f18,%f6,%f46 294*ebfedea0SLionel Sambuc or %o7,%o0,%o0 ! 64-bit result 295*ebfedea0SLionel Sambuc faddd %f42,%f58,%f58 296*ebfedea0SLionel Sambuc fmuld %f22,%f14,%f62 297*ebfedea0SLionel Sambuc addcc %g1,%o0,%o0 298*ebfedea0SLionel Sambuc faddd %f24,%f48,%f48 299*ebfedea0SLionel Sambuc srlx %o3,16,%g1 ! 34-bit carry 300*ebfedea0SLionel Sambuc faddd %f26,%f50,%f50 301*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 302*ebfedea0SLionel Sambuc add %g1,1,%g1 303*ebfedea0SLionel Sambuc 304*ebfedea0SLionel Sambuc stx %o0,[%l0] ! tp[j-1]= 305*ebfedea0SLionel Sambuc 306*ebfedea0SLionel Sambuc faddd %f44,%f60,%f24 ! %f60 307*ebfedea0SLionel Sambuc faddd %f46,%f62,%f26 ! %f62 308*ebfedea0SLionel Sambuc 309*ebfedea0SLionel Sambuc faddd %f52,%f56,%f52 310*ebfedea0SLionel Sambuc faddd %f54,%f58,%f54 311*ebfedea0SLionel Sambuc 312*ebfedea0SLionel Sambuc fdtox %f48,%f48 313*ebfedea0SLionel Sambuc fdtox %f50,%f50 314*ebfedea0SLionel Sambuc fdtox %f52,%f52 315*ebfedea0SLionel Sambuc fdtox %f54,%f54 316*ebfedea0SLionel Sambuc 317*ebfedea0SLionel Sambuc std %f48,[%sp+2047+192+0] 318*ebfedea0SLionel Sambuc std %f50,[%sp+2047+192+8] 319*ebfedea0SLionel Sambuc std %f52,[%sp+2047+192+16] 320*ebfedea0SLionel Sambuc std %f54,[%sp+2047+192+24] 321*ebfedea0SLionel Sambuc 322*ebfedea0SLionel Sambuc addcc %l6,8,%l6 323*ebfedea0SLionel Sambuc bnz,pt %icc,.L1st 324*ebfedea0SLionel Sambuc add %l0,8,%l0 325*ebfedea0SLionel Sambuc 326*ebfedea0SLionel Sambuc.L1stskip: 327*ebfedea0SLionel Sambuc fdtox %f24,%f24 328*ebfedea0SLionel Sambuc fdtox %f26,%f26 329*ebfedea0SLionel Sambuc 330*ebfedea0SLionel Sambuc ldx [%sp+2047+192+0],%o0 331*ebfedea0SLionel Sambuc ldx [%sp+2047+192+8],%o1 332*ebfedea0SLionel Sambuc ldx [%sp+2047+192+16],%o2 333*ebfedea0SLionel Sambuc ldx [%sp+2047+192+24],%o3 334*ebfedea0SLionel Sambuc 335*ebfedea0SLionel Sambuc srlx %o0,16,%o7 336*ebfedea0SLionel Sambuc std %f24,[%sp+2047+192+32] 337*ebfedea0SLionel Sambuc add %o7,%o1,%o1 338*ebfedea0SLionel Sambuc std %f26,[%sp+2047+192+40] 339*ebfedea0SLionel Sambuc srlx %o1,16,%o7 340*ebfedea0SLionel Sambuc add %o7,%o2,%o2 341*ebfedea0SLionel Sambuc srlx %o2,16,%o7 342*ebfedea0SLionel Sambuc add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] 343*ebfedea0SLionel Sambuc and %o0,%l7,%o0 344*ebfedea0SLionel Sambuc and %o1,%l7,%o1 345*ebfedea0SLionel Sambuc and %o2,%l7,%o2 346*ebfedea0SLionel Sambuc sllx %o1,16,%o1 347*ebfedea0SLionel Sambuc sllx %o2,32,%o2 348*ebfedea0SLionel Sambuc sllx %o3,48,%o7 349*ebfedea0SLionel Sambuc or %o1,%o0,%o0 350*ebfedea0SLionel Sambuc or %o2,%o0,%o0 351*ebfedea0SLionel Sambuc or %o7,%o0,%o0 ! 64-bit result 352*ebfedea0SLionel Sambuc ldx [%sp+2047+192+32],%o4 353*ebfedea0SLionel Sambuc addcc %g1,%o0,%o0 354*ebfedea0SLionel Sambuc ldx [%sp+2047+192+40],%o5 355*ebfedea0SLionel Sambuc srlx %o3,16,%g1 ! 34-bit carry 356*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 357*ebfedea0SLionel Sambuc add %g1,1,%g1 358*ebfedea0SLionel Sambuc 359*ebfedea0SLionel Sambuc stx %o0,[%l0] ! tp[j-1]= 360*ebfedea0SLionel Sambuc add %l0,8,%l0 361*ebfedea0SLionel Sambuc 362*ebfedea0SLionel Sambuc srlx %o4,16,%o7 363*ebfedea0SLionel Sambuc add %o7,%o5,%o5 364*ebfedea0SLionel Sambuc and %o4,%l7,%o4 365*ebfedea0SLionel Sambuc sllx %o5,16,%o7 366*ebfedea0SLionel Sambuc or %o7,%o4,%o4 367*ebfedea0SLionel Sambuc addcc %g1,%o4,%o4 368*ebfedea0SLionel Sambuc srlx %o5,48,%g1 369*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 370*ebfedea0SLionel Sambuc add %g1,1,%g1 371*ebfedea0SLionel Sambuc 372*ebfedea0SLionel Sambuc mov %g1,%i4 373*ebfedea0SLionel Sambuc stx %o4,[%l0] ! tp[num-1]= 374*ebfedea0SLionel Sambuc 375*ebfedea0SLionel Sambuc ba .Louter 376*ebfedea0SLionel Sambuc add %l5,8,%l5 377*ebfedea0SLionel Sambuc.align 32 378*ebfedea0SLionel Sambuc.Louter: 379*ebfedea0SLionel Sambuc sub %g0,%i5,%l6 ! j=-num 380*ebfedea0SLionel Sambuc add %sp,2047+192+64,%l0 381*ebfedea0SLionel Sambuc 382*ebfedea0SLionel Sambuc add %i1,%l6,%o3 383*ebfedea0SLionel Sambuc add %i2,%l5,%o4 384*ebfedea0SLionel Sambuc 385*ebfedea0SLionel Sambuc ld [%o3+4],%g1 ! bp[i] 386*ebfedea0SLionel Sambuc ld [%o3+0],%o0 387*ebfedea0SLionel Sambuc ld [%o4+4],%g5 ! ap[0] 388*ebfedea0SLionel Sambuc sllx %g1,32,%g1 389*ebfedea0SLionel Sambuc ld [%o4+0],%o1 390*ebfedea0SLionel Sambuc sllx %g5,32,%g5 391*ebfedea0SLionel Sambuc or %g1,%o0,%o0 392*ebfedea0SLionel Sambuc or %g5,%o1,%o1 393*ebfedea0SLionel Sambuc 394*ebfedea0SLionel Sambuc ldx [%l0],%o2 ! tp[0] 395*ebfedea0SLionel Sambuc mulx %o1,%o0,%o0 396*ebfedea0SLionel Sambuc addcc %o2,%o0,%o0 397*ebfedea0SLionel Sambuc mulx %g4,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0 398*ebfedea0SLionel Sambuc stx %o0,[%sp+2047+192+0] 399*ebfedea0SLionel Sambuc 400*ebfedea0SLionel Sambuc ! transfer b[i] to FPU as 4x16-bit values 401*ebfedea0SLionel Sambuc ldda [%o4+2]%asi,%f0 402*ebfedea0SLionel Sambuc ldda [%o4+0]%asi,%f2 403*ebfedea0SLionel Sambuc ldda [%o4+6]%asi,%f4 404*ebfedea0SLionel Sambuc ldda [%o4+4]%asi,%f6 405*ebfedea0SLionel Sambuc 406*ebfedea0SLionel Sambuc ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values 407*ebfedea0SLionel Sambuc ldda [%sp+2047+192+6]%asi,%f8 408*ebfedea0SLionel Sambuc fxtod %f0,%f0 409*ebfedea0SLionel Sambuc ldda [%sp+2047+192+4]%asi,%f10 410*ebfedea0SLionel Sambuc fxtod %f2,%f2 411*ebfedea0SLionel Sambuc ldda [%sp+2047+192+2]%asi,%f12 412*ebfedea0SLionel Sambuc fxtod %f4,%f4 413*ebfedea0SLionel Sambuc ldda [%sp+2047+192+0]%asi,%f14 414*ebfedea0SLionel Sambuc fxtod %f6,%f6 415*ebfedea0SLionel Sambuc ldd [%l1+%l6],%f16 ! load a[j] in double format 416*ebfedea0SLionel Sambuc fxtod %f8,%f8 417*ebfedea0SLionel Sambuc ldd [%l2+%l6],%f18 418*ebfedea0SLionel Sambuc fxtod %f10,%f10 419*ebfedea0SLionel Sambuc ldd [%l3+%l6],%f20 ! load n[j] in double format 420*ebfedea0SLionel Sambuc fxtod %f12,%f12 421*ebfedea0SLionel Sambuc ldd [%l4+%l6],%f22 422*ebfedea0SLionel Sambuc fxtod %f14,%f14 423*ebfedea0SLionel Sambuc 424*ebfedea0SLionel Sambuc fmuld %f16,%f0,%f32 425*ebfedea0SLionel Sambuc fmuld %f20,%f8,%f48 426*ebfedea0SLionel Sambuc fmuld %f16,%f2,%f34 427*ebfedea0SLionel Sambuc fmuld %f20,%f10,%f50 428*ebfedea0SLionel Sambuc fmuld %f16,%f4,%f36 429*ebfedea0SLionel Sambuc faddd %f32,%f48,%f48 430*ebfedea0SLionel Sambuc fmuld %f20,%f12,%f52 431*ebfedea0SLionel Sambuc fmuld %f16,%f6,%f38 432*ebfedea0SLionel Sambuc faddd %f34,%f50,%f50 433*ebfedea0SLionel Sambuc fmuld %f20,%f14,%f54 434*ebfedea0SLionel Sambuc fmuld %f18,%f0,%f40 435*ebfedea0SLionel Sambuc faddd %f36,%f52,%f52 436*ebfedea0SLionel Sambuc fmuld %f22,%f8,%f56 437*ebfedea0SLionel Sambuc fmuld %f18,%f2,%f42 438*ebfedea0SLionel Sambuc faddd %f38,%f54,%f54 439*ebfedea0SLionel Sambuc fmuld %f22,%f10,%f58 440*ebfedea0SLionel Sambuc fmuld %f18,%f4,%f44 441*ebfedea0SLionel Sambuc faddd %f40,%f56,%f56 442*ebfedea0SLionel Sambuc fmuld %f22,%f12,%f60 443*ebfedea0SLionel Sambuc fmuld %f18,%f6,%f46 444*ebfedea0SLionel Sambuc faddd %f42,%f58,%f58 445*ebfedea0SLionel Sambuc fmuld %f22,%f14,%f62 446*ebfedea0SLionel Sambuc 447*ebfedea0SLionel Sambuc faddd %f44,%f60,%f24 ! %f60 448*ebfedea0SLionel Sambuc faddd %f46,%f62,%f26 ! %f62 449*ebfedea0SLionel Sambuc 450*ebfedea0SLionel Sambuc faddd %f52,%f56,%f52 451*ebfedea0SLionel Sambuc faddd %f54,%f58,%f54 452*ebfedea0SLionel Sambuc 453*ebfedea0SLionel Sambuc fdtox %f48,%f48 454*ebfedea0SLionel Sambuc fdtox %f50,%f50 455*ebfedea0SLionel Sambuc fdtox %f52,%f52 456*ebfedea0SLionel Sambuc fdtox %f54,%f54 457*ebfedea0SLionel Sambuc 458*ebfedea0SLionel Sambuc std %f48,[%sp+2047+192+0] 459*ebfedea0SLionel Sambuc std %f50,[%sp+2047+192+8] 460*ebfedea0SLionel Sambuc std %f52,[%sp+2047+192+16] 461*ebfedea0SLionel Sambuc add %l6,8,%l6 462*ebfedea0SLionel Sambuc std %f54,[%sp+2047+192+24] 463*ebfedea0SLionel Sambuc 464*ebfedea0SLionel Sambuc ldd [%l1+%l6],%f16 ! load a[j] in double format 465*ebfedea0SLionel Sambuc ldd [%l2+%l6],%f18 466*ebfedea0SLionel Sambuc ldd [%l3+%l6],%f20 ! load n[j] in double format 467*ebfedea0SLionel Sambuc ldd [%l4+%l6],%f22 468*ebfedea0SLionel Sambuc 469*ebfedea0SLionel Sambuc fmuld %f16,%f0,%f32 470*ebfedea0SLionel Sambuc fmuld %f20,%f8,%f48 471*ebfedea0SLionel Sambuc fmuld %f16,%f2,%f34 472*ebfedea0SLionel Sambuc fmuld %f20,%f10,%f50 473*ebfedea0SLionel Sambuc fmuld %f16,%f4,%f36 474*ebfedea0SLionel Sambuc ldx [%sp+2047+192+0],%o0 475*ebfedea0SLionel Sambuc faddd %f32,%f48,%f48 476*ebfedea0SLionel Sambuc fmuld %f20,%f12,%f52 477*ebfedea0SLionel Sambuc ldx [%sp+2047+192+8],%o1 478*ebfedea0SLionel Sambuc fmuld %f16,%f6,%f38 479*ebfedea0SLionel Sambuc ldx [%sp+2047+192+16],%o2 480*ebfedea0SLionel Sambuc faddd %f34,%f50,%f50 481*ebfedea0SLionel Sambuc fmuld %f20,%f14,%f54 482*ebfedea0SLionel Sambuc ldx [%sp+2047+192+24],%o3 483*ebfedea0SLionel Sambuc fmuld %f18,%f0,%f40 484*ebfedea0SLionel Sambuc 485*ebfedea0SLionel Sambuc srlx %o0,16,%o7 486*ebfedea0SLionel Sambuc faddd %f36,%f52,%f52 487*ebfedea0SLionel Sambuc fmuld %f22,%f8,%f56 488*ebfedea0SLionel Sambuc add %o7,%o1,%o1 489*ebfedea0SLionel Sambuc fmuld %f18,%f2,%f42 490*ebfedea0SLionel Sambuc srlx %o1,16,%o7 491*ebfedea0SLionel Sambuc faddd %f38,%f54,%f54 492*ebfedea0SLionel Sambuc fmuld %f22,%f10,%f58 493*ebfedea0SLionel Sambuc add %o7,%o2,%o2 494*ebfedea0SLionel Sambuc fmuld %f18,%f4,%f44 495*ebfedea0SLionel Sambuc srlx %o2,16,%o7 496*ebfedea0SLionel Sambuc faddd %f40,%f56,%f56 497*ebfedea0SLionel Sambuc fmuld %f22,%f12,%f60 498*ebfedea0SLionel Sambuc add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] 499*ebfedea0SLionel Sambuc ! why? 500*ebfedea0SLionel Sambuc and %o0,%l7,%o0 501*ebfedea0SLionel Sambuc fmuld %f18,%f6,%f46 502*ebfedea0SLionel Sambuc and %o1,%l7,%o1 503*ebfedea0SLionel Sambuc and %o2,%l7,%o2 504*ebfedea0SLionel Sambuc faddd %f42,%f58,%f58 505*ebfedea0SLionel Sambuc fmuld %f22,%f14,%f62 506*ebfedea0SLionel Sambuc sllx %o1,16,%o1 507*ebfedea0SLionel Sambuc faddd %f24,%f48,%f48 508*ebfedea0SLionel Sambuc sllx %o2,32,%o2 509*ebfedea0SLionel Sambuc faddd %f26,%f50,%f50 510*ebfedea0SLionel Sambuc sllx %o3,48,%o7 511*ebfedea0SLionel Sambuc or %o1,%o0,%o0 512*ebfedea0SLionel Sambuc faddd %f44,%f60,%f24 ! %f60 513*ebfedea0SLionel Sambuc or %o2,%o0,%o0 514*ebfedea0SLionel Sambuc faddd %f46,%f62,%f26 ! %f62 515*ebfedea0SLionel Sambuc or %o7,%o0,%o0 ! 64-bit result 516*ebfedea0SLionel Sambuc ldx [%l0],%o7 517*ebfedea0SLionel Sambuc faddd %f52,%f56,%f52 518*ebfedea0SLionel Sambuc addcc %o7,%o0,%o0 519*ebfedea0SLionel Sambuc ! end-of-why? 520*ebfedea0SLionel Sambuc faddd %f54,%f58,%f54 521*ebfedea0SLionel Sambuc srlx %o3,16,%g1 ! 34-bit carry 522*ebfedea0SLionel Sambuc fdtox %f48,%f48 523*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 524*ebfedea0SLionel Sambuc add %g1,1,%g1 525*ebfedea0SLionel Sambuc 526*ebfedea0SLionel Sambuc fdtox %f50,%f50 527*ebfedea0SLionel Sambuc fdtox %f52,%f52 528*ebfedea0SLionel Sambuc fdtox %f54,%f54 529*ebfedea0SLionel Sambuc 530*ebfedea0SLionel Sambuc std %f48,[%sp+2047+192+0] 531*ebfedea0SLionel Sambuc std %f50,[%sp+2047+192+8] 532*ebfedea0SLionel Sambuc addcc %l6,8,%l6 533*ebfedea0SLionel Sambuc std %f52,[%sp+2047+192+16] 534*ebfedea0SLionel Sambuc bz,pn %icc,.Linnerskip 535*ebfedea0SLionel Sambuc std %f54,[%sp+2047+192+24] 536*ebfedea0SLionel Sambuc 537*ebfedea0SLionel Sambuc ba .Linner 538*ebfedea0SLionel Sambuc nop 539*ebfedea0SLionel Sambuc.align 32 540*ebfedea0SLionel Sambuc.Linner: 541*ebfedea0SLionel Sambuc ldd [%l1+%l6],%f16 ! load a[j] in double format 542*ebfedea0SLionel Sambuc ldd [%l2+%l6],%f18 543*ebfedea0SLionel Sambuc ldd [%l3+%l6],%f20 ! load n[j] in double format 544*ebfedea0SLionel Sambuc ldd [%l4+%l6],%f22 545*ebfedea0SLionel Sambuc 546*ebfedea0SLionel Sambuc fmuld %f16,%f0,%f32 547*ebfedea0SLionel Sambuc fmuld %f20,%f8,%f48 548*ebfedea0SLionel Sambuc fmuld %f16,%f2,%f34 549*ebfedea0SLionel Sambuc fmuld %f20,%f10,%f50 550*ebfedea0SLionel Sambuc fmuld %f16,%f4,%f36 551*ebfedea0SLionel Sambuc ldx [%sp+2047+192+0],%o0 552*ebfedea0SLionel Sambuc faddd %f32,%f48,%f48 553*ebfedea0SLionel Sambuc fmuld %f20,%f12,%f52 554*ebfedea0SLionel Sambuc ldx [%sp+2047+192+8],%o1 555*ebfedea0SLionel Sambuc fmuld %f16,%f6,%f38 556*ebfedea0SLionel Sambuc ldx [%sp+2047+192+16],%o2 557*ebfedea0SLionel Sambuc faddd %f34,%f50,%f50 558*ebfedea0SLionel Sambuc fmuld %f20,%f14,%f54 559*ebfedea0SLionel Sambuc ldx [%sp+2047+192+24],%o3 560*ebfedea0SLionel Sambuc fmuld %f18,%f0,%f40 561*ebfedea0SLionel Sambuc 562*ebfedea0SLionel Sambuc srlx %o0,16,%o7 563*ebfedea0SLionel Sambuc faddd %f36,%f52,%f52 564*ebfedea0SLionel Sambuc fmuld %f22,%f8,%f56 565*ebfedea0SLionel Sambuc add %o7,%o1,%o1 566*ebfedea0SLionel Sambuc fmuld %f18,%f2,%f42 567*ebfedea0SLionel Sambuc srlx %o1,16,%o7 568*ebfedea0SLionel Sambuc faddd %f38,%f54,%f54 569*ebfedea0SLionel Sambuc fmuld %f22,%f10,%f58 570*ebfedea0SLionel Sambuc add %o7,%o2,%o2 571*ebfedea0SLionel Sambuc fmuld %f18,%f4,%f44 572*ebfedea0SLionel Sambuc srlx %o2,16,%o7 573*ebfedea0SLionel Sambuc faddd %f40,%f56,%f56 574*ebfedea0SLionel Sambuc fmuld %f22,%f12,%f60 575*ebfedea0SLionel Sambuc add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] 576*ebfedea0SLionel Sambuc and %o0,%l7,%o0 577*ebfedea0SLionel Sambuc fmuld %f18,%f6,%f46 578*ebfedea0SLionel Sambuc and %o1,%l7,%o1 579*ebfedea0SLionel Sambuc and %o2,%l7,%o2 580*ebfedea0SLionel Sambuc faddd %f42,%f58,%f58 581*ebfedea0SLionel Sambuc fmuld %f22,%f14,%f62 582*ebfedea0SLionel Sambuc sllx %o1,16,%o1 583*ebfedea0SLionel Sambuc faddd %f24,%f48,%f48 584*ebfedea0SLionel Sambuc sllx %o2,32,%o2 585*ebfedea0SLionel Sambuc faddd %f26,%f50,%f50 586*ebfedea0SLionel Sambuc sllx %o3,48,%o7 587*ebfedea0SLionel Sambuc or %o1,%o0,%o0 588*ebfedea0SLionel Sambuc faddd %f44,%f60,%f24 ! %f60 589*ebfedea0SLionel Sambuc or %o2,%o0,%o0 590*ebfedea0SLionel Sambuc faddd %f46,%f62,%f26 ! %f62 591*ebfedea0SLionel Sambuc or %o7,%o0,%o0 ! 64-bit result 592*ebfedea0SLionel Sambuc faddd %f52,%f56,%f52 593*ebfedea0SLionel Sambuc addcc %g1,%o0,%o0 594*ebfedea0SLionel Sambuc ldx [%l0+8],%o7 ! tp[j] 595*ebfedea0SLionel Sambuc faddd %f54,%f58,%f54 596*ebfedea0SLionel Sambuc srlx %o3,16,%g1 ! 34-bit carry 597*ebfedea0SLionel Sambuc fdtox %f48,%f48 598*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 599*ebfedea0SLionel Sambuc add %g1,1,%g1 600*ebfedea0SLionel Sambuc fdtox %f50,%f50 601*ebfedea0SLionel Sambuc addcc %o7,%o0,%o0 602*ebfedea0SLionel Sambuc fdtox %f52,%f52 603*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 604*ebfedea0SLionel Sambuc add %g1,1,%g1 605*ebfedea0SLionel Sambuc 606*ebfedea0SLionel Sambuc stx %o0,[%l0] ! tp[j-1] 607*ebfedea0SLionel Sambuc fdtox %f54,%f54 608*ebfedea0SLionel Sambuc 609*ebfedea0SLionel Sambuc std %f48,[%sp+2047+192+0] 610*ebfedea0SLionel Sambuc std %f50,[%sp+2047+192+8] 611*ebfedea0SLionel Sambuc std %f52,[%sp+2047+192+16] 612*ebfedea0SLionel Sambuc addcc %l6,8,%l6 613*ebfedea0SLionel Sambuc std %f54,[%sp+2047+192+24] 614*ebfedea0SLionel Sambuc bnz,pt %icc,.Linner 615*ebfedea0SLionel Sambuc add %l0,8,%l0 616*ebfedea0SLionel Sambuc 617*ebfedea0SLionel Sambuc.Linnerskip: 618*ebfedea0SLionel Sambuc fdtox %f24,%f24 619*ebfedea0SLionel Sambuc fdtox %f26,%f26 620*ebfedea0SLionel Sambuc 621*ebfedea0SLionel Sambuc ldx [%sp+2047+192+0],%o0 622*ebfedea0SLionel Sambuc ldx [%sp+2047+192+8],%o1 623*ebfedea0SLionel Sambuc ldx [%sp+2047+192+16],%o2 624*ebfedea0SLionel Sambuc ldx [%sp+2047+192+24],%o3 625*ebfedea0SLionel Sambuc 626*ebfedea0SLionel Sambuc srlx %o0,16,%o7 627*ebfedea0SLionel Sambuc std %f24,[%sp+2047+192+32] 628*ebfedea0SLionel Sambuc add %o7,%o1,%o1 629*ebfedea0SLionel Sambuc std %f26,[%sp+2047+192+40] 630*ebfedea0SLionel Sambuc srlx %o1,16,%o7 631*ebfedea0SLionel Sambuc add %o7,%o2,%o2 632*ebfedea0SLionel Sambuc srlx %o2,16,%o7 633*ebfedea0SLionel Sambuc add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] 634*ebfedea0SLionel Sambuc and %o0,%l7,%o0 635*ebfedea0SLionel Sambuc and %o1,%l7,%o1 636*ebfedea0SLionel Sambuc and %o2,%l7,%o2 637*ebfedea0SLionel Sambuc sllx %o1,16,%o1 638*ebfedea0SLionel Sambuc sllx %o2,32,%o2 639*ebfedea0SLionel Sambuc sllx %o3,48,%o7 640*ebfedea0SLionel Sambuc or %o1,%o0,%o0 641*ebfedea0SLionel Sambuc or %o2,%o0,%o0 642*ebfedea0SLionel Sambuc ldx [%sp+2047+192+32],%o4 643*ebfedea0SLionel Sambuc or %o7,%o0,%o0 ! 64-bit result 644*ebfedea0SLionel Sambuc ldx [%sp+2047+192+40],%o5 645*ebfedea0SLionel Sambuc addcc %g1,%o0,%o0 646*ebfedea0SLionel Sambuc ldx [%l0+8],%o7 ! tp[j] 647*ebfedea0SLionel Sambuc srlx %o3,16,%g1 ! 34-bit carry 648*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 649*ebfedea0SLionel Sambuc add %g1,1,%g1 650*ebfedea0SLionel Sambuc 651*ebfedea0SLionel Sambuc addcc %o7,%o0,%o0 652*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 653*ebfedea0SLionel Sambuc add %g1,1,%g1 654*ebfedea0SLionel Sambuc 655*ebfedea0SLionel Sambuc stx %o0,[%l0] ! tp[j-1] 656*ebfedea0SLionel Sambuc add %l0,8,%l0 657*ebfedea0SLionel Sambuc 658*ebfedea0SLionel Sambuc srlx %o4,16,%o7 659*ebfedea0SLionel Sambuc add %o7,%o5,%o5 660*ebfedea0SLionel Sambuc and %o4,%l7,%o4 661*ebfedea0SLionel Sambuc sllx %o5,16,%o7 662*ebfedea0SLionel Sambuc or %o7,%o4,%o4 663*ebfedea0SLionel Sambuc addcc %g1,%o4,%o4 664*ebfedea0SLionel Sambuc srlx %o5,48,%g1 665*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 666*ebfedea0SLionel Sambuc add %g1,1,%g1 667*ebfedea0SLionel Sambuc 668*ebfedea0SLionel Sambuc addcc %i4,%o4,%o4 669*ebfedea0SLionel Sambuc stx %o4,[%l0] ! tp[num-1] 670*ebfedea0SLionel Sambuc mov %g1,%i4 671*ebfedea0SLionel Sambuc bcs,a %xcc,.+8 672*ebfedea0SLionel Sambuc add %i4,1,%i4 673*ebfedea0SLionel Sambuc 674*ebfedea0SLionel Sambuc addcc %l5,8,%l5 675*ebfedea0SLionel Sambuc bnz %icc,.Louter 676*ebfedea0SLionel Sambuc nop 677*ebfedea0SLionel Sambuc 678*ebfedea0SLionel Sambuc add %l0,8,%l0 ! adjust tp to point at the end 679*ebfedea0SLionel Sambuc orn %g0,%g0,%g4 680*ebfedea0SLionel Sambuc sub %g0,%i5,%o7 ! n=-num 681*ebfedea0SLionel Sambuc ba .Lsub 682*ebfedea0SLionel Sambuc subcc %g0,%g0,%g0 ! clear %icc.c 683*ebfedea0SLionel Sambuc 684*ebfedea0SLionel Sambuc.align 32 685*ebfedea0SLionel Sambuc.Lsub: 686*ebfedea0SLionel Sambuc ldx [%l0+%o7],%o0 687*ebfedea0SLionel Sambuc add %i3,%o7,%g1 688*ebfedea0SLionel Sambuc ld [%g1+0],%o2 689*ebfedea0SLionel Sambuc ld [%g1+4],%o3 690*ebfedea0SLionel Sambuc srlx %o0,32,%o1 691*ebfedea0SLionel Sambuc subccc %o0,%o2,%o2 692*ebfedea0SLionel Sambuc add %i0,%o7,%g1 693*ebfedea0SLionel Sambuc subccc %o1,%o3,%o3 694*ebfedea0SLionel Sambuc st %o2,[%g1+0] 695*ebfedea0SLionel Sambuc add %o7,8,%o7 696*ebfedea0SLionel Sambuc brnz,pt %o7,.Lsub 697*ebfedea0SLionel Sambuc st %o3,[%g1+4] 698*ebfedea0SLionel Sambuc subc %i4,0,%g4 699*ebfedea0SLionel Sambuc sub %g0,%i5,%o7 ! n=-num 700*ebfedea0SLionel Sambuc ba .Lcopy 701*ebfedea0SLionel Sambuc nop 702*ebfedea0SLionel Sambuc 703*ebfedea0SLionel Sambuc.align 32 704*ebfedea0SLionel Sambuc.Lcopy: 705*ebfedea0SLionel Sambuc ldx [%l0+%o7],%o0 706*ebfedea0SLionel Sambuc add %i0,%o7,%g1 707*ebfedea0SLionel Sambuc ld [%g1+0],%o2 708*ebfedea0SLionel Sambuc ld [%g1+4],%o3 709*ebfedea0SLionel Sambuc stx %g0,[%l0+%o7] 710*ebfedea0SLionel Sambuc and %o0,%g4,%o0 711*ebfedea0SLionel Sambuc srlx %o0,32,%o1 712*ebfedea0SLionel Sambuc andn %o2,%g4,%o2 713*ebfedea0SLionel Sambuc andn %o3,%g4,%o3 714*ebfedea0SLionel Sambuc or %o2,%o0,%o0 715*ebfedea0SLionel Sambuc or %o3,%o1,%o1 716*ebfedea0SLionel Sambuc st %o0,[%g1+0] 717*ebfedea0SLionel Sambuc add %o7,8,%o7 718*ebfedea0SLionel Sambuc brnz,pt %o7,.Lcopy 719*ebfedea0SLionel Sambuc st %o1,[%g1+4] 720*ebfedea0SLionel Sambuc sub %g0,%i5,%o7 ! n=-num 721*ebfedea0SLionel Sambuc 722*ebfedea0SLionel Sambuc.Lzap: 723*ebfedea0SLionel Sambuc stx %g0,[%l1+%o7] 724*ebfedea0SLionel Sambuc stx %g0,[%l2+%o7] 725*ebfedea0SLionel Sambuc stx %g0,[%l3+%o7] 726*ebfedea0SLionel Sambuc stx %g0,[%l4+%o7] 727*ebfedea0SLionel Sambuc add %o7,8,%o7 728*ebfedea0SLionel Sambuc brnz,pt %o7,.Lzap 729*ebfedea0SLionel Sambuc nop 730*ebfedea0SLionel Sambuc 731*ebfedea0SLionel Sambuc ldx [%sp+2047+192+48],%o7 732*ebfedea0SLionel Sambuc wr %g0,%o7,%asi ! restore %asi 733*ebfedea0SLionel Sambuc 734*ebfedea0SLionel Sambuc mov 1,%i0 735*ebfedea0SLionel Sambuc.Lret: 736*ebfedea0SLionel Sambuc ret 737*ebfedea0SLionel Sambuc restore 738*ebfedea0SLionel Sambuc.type bn_mul_mont_fpu,#function 739*ebfedea0SLionel Sambuc.size bn_mul_mont_fpu,(.-bn_mul_mont_fpu) 740*ebfedea0SLionel Sambuc.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro@openssl.org>" 741*ebfedea0SLionel Sambuc.align 32 742