1*ebfedea0SLionel Sambuc.section ".text",#alloc,#execinstr 2*ebfedea0SLionel Sambuc 3*ebfedea0SLionel Sambuc.global bn_mul_mont_int 4*ebfedea0SLionel Sambuc.align 32 5*ebfedea0SLionel Sambucbn_mul_mont_int: 6*ebfedea0SLionel Sambuc cmp %o5,4 ! 128 bits minimum 7*ebfedea0SLionel Sambuc bge,pt %icc,.Lenter 8*ebfedea0SLionel Sambuc sethi %hi(0xffffffff),%g1 9*ebfedea0SLionel Sambuc retl 10*ebfedea0SLionel Sambuc clr %o0 11*ebfedea0SLionel Sambuc.align 32 12*ebfedea0SLionel Sambuc.Lenter: 13*ebfedea0SLionel Sambuc save %sp,-192,%sp 14*ebfedea0SLionel Sambuc sll %i5,2,%i5 ! num*=4 15*ebfedea0SLionel Sambuc or %g1,%lo(0xffffffff),%g1 16*ebfedea0SLionel Sambuc ld [%i4],%i4 17*ebfedea0SLionel Sambuc cmp %i1,%i2 18*ebfedea0SLionel Sambuc and %i5,%g1,%i5 19*ebfedea0SLionel Sambuc ld [%i2],%l2 ! bp[0] 20*ebfedea0SLionel Sambuc nop 21*ebfedea0SLionel Sambuc 22*ebfedea0SLionel Sambuc add %sp,2047,%o7 ! real top of stack 23*ebfedea0SLionel Sambuc ld [%i1],%o0 ! ap[0] ! redundant in squaring context 24*ebfedea0SLionel Sambuc sub %o7,%i5,%o7 25*ebfedea0SLionel Sambuc ld [%i1+4],%l5 ! ap[1] 26*ebfedea0SLionel Sambuc and %o7,-1024,%o7 27*ebfedea0SLionel Sambuc ld [%i3],%o1 ! np[0] 28*ebfedea0SLionel Sambuc sub %o7,2047,%sp ! alloca 29*ebfedea0SLionel Sambuc ld [%i3+4],%l6 ! np[1] 30*ebfedea0SLionel Sambuc be,pt %xcc,.Lbn_sqr_mont 31*ebfedea0SLionel Sambuc mov 12,%l1 32*ebfedea0SLionel Sambuc 33*ebfedea0SLionel Sambuc mulx %o0,%l2,%o0 ! ap[0]*bp[0] 34*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 !prologue! ap[1]*bp[0] 35*ebfedea0SLionel Sambuc and %o0,%g1,%o3 36*ebfedea0SLionel Sambuc add %sp,2047+192,%l4 37*ebfedea0SLionel Sambuc ld [%i1+8],%l5 !prologue! 38*ebfedea0SLionel Sambuc 39*ebfedea0SLionel Sambuc mulx %i4,%o3,%l3 ! "t[0]"*n0 40*ebfedea0SLionel Sambuc and %l3,%g1,%l3 41*ebfedea0SLionel Sambuc 42*ebfedea0SLionel Sambuc mulx %o1,%l3,%o1 ! np[0]*"t[0]"*n0 43*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 !prologue! np[1]*"t[0]"*n0 44*ebfedea0SLionel Sambuc srlx %o0,32,%o0 45*ebfedea0SLionel Sambuc add %o3,%o1,%o1 46*ebfedea0SLionel Sambuc ld [%i3+8],%l6 !prologue! 47*ebfedea0SLionel Sambuc srlx %o1,32,%o1 48*ebfedea0SLionel Sambuc mov %g4,%o3 !prologue! 49*ebfedea0SLionel Sambuc 50*ebfedea0SLionel Sambuc.L1st: 51*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 52*ebfedea0SLionel Sambuc mulx %l6,%l3,%g5 53*ebfedea0SLionel Sambuc add %o3,%o0,%o0 54*ebfedea0SLionel Sambuc ld [%i1+%l1],%l5 ! ap[j] 55*ebfedea0SLionel Sambuc and %o0,%g1,%o3 56*ebfedea0SLionel Sambuc add %o4,%o1,%o1 57*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 ! np[j] 58*ebfedea0SLionel Sambuc srlx %o0,32,%o0 59*ebfedea0SLionel Sambuc add %o3,%o1,%o1 60*ebfedea0SLionel Sambuc add %l1,4,%l1 ! j++ 61*ebfedea0SLionel Sambuc mov %g4,%o3 62*ebfedea0SLionel Sambuc st %o1,[%l4] 63*ebfedea0SLionel Sambuc cmp %l1,%i5 64*ebfedea0SLionel Sambuc mov %g5,%o4 65*ebfedea0SLionel Sambuc srlx %o1,32,%o1 66*ebfedea0SLionel Sambuc bl %icc,.L1st 67*ebfedea0SLionel Sambuc add %l4,4,%l4 ! tp++ 68*ebfedea0SLionel Sambuc!.L1st 69*ebfedea0SLionel Sambuc 70*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 !epilogue! 71*ebfedea0SLionel Sambuc mulx %l6,%l3,%g5 72*ebfedea0SLionel Sambuc add %o3,%o0,%o0 73*ebfedea0SLionel Sambuc and %o0,%g1,%o3 74*ebfedea0SLionel Sambuc add %o4,%o1,%o1 75*ebfedea0SLionel Sambuc srlx %o0,32,%o0 76*ebfedea0SLionel Sambuc add %o3,%o1,%o1 77*ebfedea0SLionel Sambuc st %o1,[%l4] 78*ebfedea0SLionel Sambuc srlx %o1,32,%o1 79*ebfedea0SLionel Sambuc 80*ebfedea0SLionel Sambuc add %g4,%o0,%o0 81*ebfedea0SLionel Sambuc and %o0,%g1,%o3 82*ebfedea0SLionel Sambuc add %g5,%o1,%o1 83*ebfedea0SLionel Sambuc srlx %o0,32,%o0 84*ebfedea0SLionel Sambuc add %o3,%o1,%o1 85*ebfedea0SLionel Sambuc st %o1,[%l4+4] 86*ebfedea0SLionel Sambuc srlx %o1,32,%o1 87*ebfedea0SLionel Sambuc 88*ebfedea0SLionel Sambuc add %o0,%o1,%o1 89*ebfedea0SLionel Sambuc st %o1,[%l4+8] 90*ebfedea0SLionel Sambuc srlx %o1,32,%o2 91*ebfedea0SLionel Sambuc 92*ebfedea0SLionel Sambuc mov 4,%l0 ! i++ 93*ebfedea0SLionel Sambuc ld [%i2+4],%l2 ! bp[1] 94*ebfedea0SLionel Sambuc.Louter: 95*ebfedea0SLionel Sambuc add %sp,2047+192,%l4 96*ebfedea0SLionel Sambuc ld [%i1],%o0 ! ap[0] 97*ebfedea0SLionel Sambuc ld [%i1+4],%l5 ! ap[1] 98*ebfedea0SLionel Sambuc ld [%i3],%o1 ! np[0] 99*ebfedea0SLionel Sambuc ld [%i3+4],%l6 ! np[1] 100*ebfedea0SLionel Sambuc ld [%l4],%g5 ! tp[0] 101*ebfedea0SLionel Sambuc ld [%l4+4],%l7 ! tp[1] 102*ebfedea0SLionel Sambuc mov 12,%l1 103*ebfedea0SLionel Sambuc 104*ebfedea0SLionel Sambuc mulx %o0,%l2,%o0 105*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 !prologue! 106*ebfedea0SLionel Sambuc add %g5,%o0,%o0 107*ebfedea0SLionel Sambuc ld [%i1+8],%l5 !prologue! 108*ebfedea0SLionel Sambuc and %o0,%g1,%o3 109*ebfedea0SLionel Sambuc 110*ebfedea0SLionel Sambuc mulx %i4,%o3,%l3 111*ebfedea0SLionel Sambuc and %l3,%g1,%l3 112*ebfedea0SLionel Sambuc 113*ebfedea0SLionel Sambuc mulx %o1,%l3,%o1 114*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 !prologue! 115*ebfedea0SLionel Sambuc srlx %o0,32,%o0 116*ebfedea0SLionel Sambuc add %o3,%o1,%o1 117*ebfedea0SLionel Sambuc ld [%i3+8],%l6 !prologue! 118*ebfedea0SLionel Sambuc srlx %o1,32,%o1 119*ebfedea0SLionel Sambuc mov %g4,%o3 !prologue! 120*ebfedea0SLionel Sambuc 121*ebfedea0SLionel Sambuc.Linner: 122*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 123*ebfedea0SLionel Sambuc mulx %l6,%l3,%g5 124*ebfedea0SLionel Sambuc add %l7,%o0,%o0 125*ebfedea0SLionel Sambuc ld [%i1+%l1],%l5 ! ap[j] 126*ebfedea0SLionel Sambuc add %o3,%o0,%o0 127*ebfedea0SLionel Sambuc add %o4,%o1,%o1 128*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 ! np[j] 129*ebfedea0SLionel Sambuc and %o0,%g1,%o3 130*ebfedea0SLionel Sambuc ld [%l4+8],%l7 ! tp[j] 131*ebfedea0SLionel Sambuc srlx %o0,32,%o0 132*ebfedea0SLionel Sambuc add %o3,%o1,%o1 133*ebfedea0SLionel Sambuc add %l1,4,%l1 ! j++ 134*ebfedea0SLionel Sambuc mov %g4,%o3 135*ebfedea0SLionel Sambuc st %o1,[%l4] ! tp[j-1] 136*ebfedea0SLionel Sambuc srlx %o1,32,%o1 137*ebfedea0SLionel Sambuc mov %g5,%o4 138*ebfedea0SLionel Sambuc cmp %l1,%i5 139*ebfedea0SLionel Sambuc bl %icc,.Linner 140*ebfedea0SLionel Sambuc add %l4,4,%l4 ! tp++ 141*ebfedea0SLionel Sambuc!.Linner 142*ebfedea0SLionel Sambuc 143*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 !epilogue! 144*ebfedea0SLionel Sambuc mulx %l6,%l3,%g5 145*ebfedea0SLionel Sambuc add %l7,%o0,%o0 146*ebfedea0SLionel Sambuc add %o3,%o0,%o0 147*ebfedea0SLionel Sambuc ld [%l4+8],%l7 ! tp[j] 148*ebfedea0SLionel Sambuc and %o0,%g1,%o3 149*ebfedea0SLionel Sambuc add %o4,%o1,%o1 150*ebfedea0SLionel Sambuc srlx %o0,32,%o0 151*ebfedea0SLionel Sambuc add %o3,%o1,%o1 152*ebfedea0SLionel Sambuc st %o1,[%l4] ! tp[j-1] 153*ebfedea0SLionel Sambuc srlx %o1,32,%o1 154*ebfedea0SLionel Sambuc 155*ebfedea0SLionel Sambuc add %l7,%o0,%o0 156*ebfedea0SLionel Sambuc add %g4,%o0,%o0 157*ebfedea0SLionel Sambuc and %o0,%g1,%o3 158*ebfedea0SLionel Sambuc add %g5,%o1,%o1 159*ebfedea0SLionel Sambuc add %o3,%o1,%o1 160*ebfedea0SLionel Sambuc st %o1,[%l4+4] ! tp[j-1] 161*ebfedea0SLionel Sambuc srlx %o0,32,%o0 162*ebfedea0SLionel Sambuc add %l0,4,%l0 ! i++ 163*ebfedea0SLionel Sambuc srlx %o1,32,%o1 164*ebfedea0SLionel Sambuc 165*ebfedea0SLionel Sambuc add %o0,%o1,%o1 166*ebfedea0SLionel Sambuc cmp %l0,%i5 167*ebfedea0SLionel Sambuc add %o2,%o1,%o1 168*ebfedea0SLionel Sambuc st %o1,[%l4+8] 169*ebfedea0SLionel Sambuc 170*ebfedea0SLionel Sambuc srlx %o1,32,%o2 171*ebfedea0SLionel Sambuc bl,a %icc,.Louter 172*ebfedea0SLionel Sambuc ld [%i2+%l0],%l2 ! bp[i] 173*ebfedea0SLionel Sambuc!.Louter 174*ebfedea0SLionel Sambuc 175*ebfedea0SLionel Sambuc add %l4,12,%l4 176*ebfedea0SLionel Sambuc 177*ebfedea0SLionel Sambuc.Ltail: 178*ebfedea0SLionel Sambuc add %i3,%i5,%i3 179*ebfedea0SLionel Sambuc add %i0,%i5,%i0 180*ebfedea0SLionel Sambuc mov %l4,%i1 181*ebfedea0SLionel Sambuc sub %g0,%i5,%o7 ! k=-num 182*ebfedea0SLionel Sambuc ba .Lsub 183*ebfedea0SLionel Sambuc subcc %g0,%g0,%g0 ! clear %icc.c 184*ebfedea0SLionel Sambuc.align 16 185*ebfedea0SLionel Sambuc.Lsub: 186*ebfedea0SLionel Sambuc ld [%l4+%o7],%o0 187*ebfedea0SLionel Sambuc ld [%i3+%o7],%o1 188*ebfedea0SLionel Sambuc subccc %o0,%o1,%o1 ! tp[j]-np[j] 189*ebfedea0SLionel Sambuc add %i0,%o7,%l0 190*ebfedea0SLionel Sambuc add %o7,4,%o7 191*ebfedea0SLionel Sambuc brnz %o7,.Lsub 192*ebfedea0SLionel Sambuc st %o1,[%l0] 193*ebfedea0SLionel Sambuc subc %o2,0,%o2 ! handle upmost overflow bit 194*ebfedea0SLionel Sambuc and %l4,%o2,%i1 195*ebfedea0SLionel Sambuc andn %i0,%o2,%i3 196*ebfedea0SLionel Sambuc or %i1,%i3,%i1 197*ebfedea0SLionel Sambuc sub %g0,%i5,%o7 198*ebfedea0SLionel Sambuc 199*ebfedea0SLionel Sambuc.Lcopy: 200*ebfedea0SLionel Sambuc ld [%i1+%o7],%o0 ! copy or in-place refresh 201*ebfedea0SLionel Sambuc st %g0,[%l4+%o7] ! zap tp 202*ebfedea0SLionel Sambuc st %o0,[%i0+%o7] 203*ebfedea0SLionel Sambuc add %o7,4,%o7 204*ebfedea0SLionel Sambuc brnz %o7,.Lcopy 205*ebfedea0SLionel Sambuc nop 206*ebfedea0SLionel Sambuc mov 1,%i0 207*ebfedea0SLionel Sambuc ret 208*ebfedea0SLionel Sambuc restore 209*ebfedea0SLionel Sambuc.align 32 210*ebfedea0SLionel Sambuc.Lbn_sqr_mont: 211*ebfedea0SLionel Sambuc mulx %l2,%l2,%o0 ! ap[0]*ap[0] 212*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 !prologue! 213*ebfedea0SLionel Sambuc and %o0,%g1,%o3 214*ebfedea0SLionel Sambuc add %sp,2047+192,%l4 215*ebfedea0SLionel Sambuc ld [%i1+8],%l5 !prologue! 216*ebfedea0SLionel Sambuc 217*ebfedea0SLionel Sambuc mulx %i4,%o3,%l3 ! "t[0]"*n0 218*ebfedea0SLionel Sambuc srlx %o0,32,%o0 219*ebfedea0SLionel Sambuc and %l3,%g1,%l3 220*ebfedea0SLionel Sambuc 221*ebfedea0SLionel Sambuc mulx %o1,%l3,%o1 ! np[0]*"t[0]"*n0 222*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 !prologue! 223*ebfedea0SLionel Sambuc and %o0,1,%i2 224*ebfedea0SLionel Sambuc ld [%i3+8],%l6 !prologue! 225*ebfedea0SLionel Sambuc srlx %o0,1,%o0 226*ebfedea0SLionel Sambuc add %o3,%o1,%o1 227*ebfedea0SLionel Sambuc srlx %o1,32,%o1 228*ebfedea0SLionel Sambuc mov %g4,%o3 !prologue! 229*ebfedea0SLionel Sambuc 230*ebfedea0SLionel Sambuc.Lsqr_1st: 231*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 232*ebfedea0SLionel Sambuc mulx %l6,%l3,%g5 233*ebfedea0SLionel Sambuc add %o3,%o0,%o0 ! ap[j]*a0+c0 234*ebfedea0SLionel Sambuc add %o4,%o1,%o1 235*ebfedea0SLionel Sambuc ld [%i1+%l1],%l5 ! ap[j] 236*ebfedea0SLionel Sambuc and %o0,%g1,%o3 237*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 ! np[j] 238*ebfedea0SLionel Sambuc srlx %o0,32,%o0 239*ebfedea0SLionel Sambuc add %o3,%o3,%o3 240*ebfedea0SLionel Sambuc or %i2,%o3,%o3 241*ebfedea0SLionel Sambuc mov %g5,%o4 242*ebfedea0SLionel Sambuc srlx %o3,32,%i2 243*ebfedea0SLionel Sambuc add %l1,4,%l1 ! j++ 244*ebfedea0SLionel Sambuc and %o3,%g1,%o3 245*ebfedea0SLionel Sambuc cmp %l1,%i5 246*ebfedea0SLionel Sambuc add %o3,%o1,%o1 247*ebfedea0SLionel Sambuc st %o1,[%l4] 248*ebfedea0SLionel Sambuc mov %g4,%o3 249*ebfedea0SLionel Sambuc srlx %o1,32,%o1 250*ebfedea0SLionel Sambuc bl %icc,.Lsqr_1st 251*ebfedea0SLionel Sambuc add %l4,4,%l4 ! tp++ 252*ebfedea0SLionel Sambuc!.Lsqr_1st 253*ebfedea0SLionel Sambuc 254*ebfedea0SLionel Sambuc mulx %l5,%l2,%g4 ! epilogue 255*ebfedea0SLionel Sambuc mulx %l6,%l3,%g5 256*ebfedea0SLionel Sambuc add %o3,%o0,%o0 ! ap[j]*a0+c0 257*ebfedea0SLionel Sambuc add %o4,%o1,%o1 258*ebfedea0SLionel Sambuc and %o0,%g1,%o3 259*ebfedea0SLionel Sambuc srlx %o0,32,%o0 260*ebfedea0SLionel Sambuc add %o3,%o3,%o3 261*ebfedea0SLionel Sambuc or %i2,%o3,%o3 262*ebfedea0SLionel Sambuc srlx %o3,32,%i2 263*ebfedea0SLionel Sambuc and %o3,%g1,%o3 264*ebfedea0SLionel Sambuc add %o3,%o1,%o1 265*ebfedea0SLionel Sambuc st %o1,[%l4] 266*ebfedea0SLionel Sambuc srlx %o1,32,%o1 267*ebfedea0SLionel Sambuc 268*ebfedea0SLionel Sambuc add %g4,%o0,%o0 ! ap[j]*a0+c0 269*ebfedea0SLionel Sambuc add %g5,%o1,%o1 270*ebfedea0SLionel Sambuc and %o0,%g1,%o3 271*ebfedea0SLionel Sambuc srlx %o0,32,%o0 272*ebfedea0SLionel Sambuc add %o3,%o3,%o3 273*ebfedea0SLionel Sambuc or %i2,%o3,%o3 274*ebfedea0SLionel Sambuc srlx %o3,32,%i2 275*ebfedea0SLionel Sambuc and %o3,%g1,%o3 276*ebfedea0SLionel Sambuc add %o3,%o1,%o1 277*ebfedea0SLionel Sambuc st %o1,[%l4+4] 278*ebfedea0SLionel Sambuc srlx %o1,32,%o1 279*ebfedea0SLionel Sambuc 280*ebfedea0SLionel Sambuc add %o0,%o0,%o0 281*ebfedea0SLionel Sambuc or %i2,%o0,%o0 282*ebfedea0SLionel Sambuc add %o0,%o1,%o1 283*ebfedea0SLionel Sambuc st %o1,[%l4+8] 284*ebfedea0SLionel Sambuc srlx %o1,32,%o2 285*ebfedea0SLionel Sambuc 286*ebfedea0SLionel Sambuc ld [%sp+2047+192],%g4 ! tp[0] 287*ebfedea0SLionel Sambuc ld [%sp+2047+192+4],%g5 ! tp[1] 288*ebfedea0SLionel Sambuc ld [%sp+2047+192+8],%l7 ! tp[2] 289*ebfedea0SLionel Sambuc ld [%i1+4],%l2 ! ap[1] 290*ebfedea0SLionel Sambuc ld [%i1+8],%l5 ! ap[2] 291*ebfedea0SLionel Sambuc ld [%i3],%o1 ! np[0] 292*ebfedea0SLionel Sambuc ld [%i3+4],%l6 ! np[1] 293*ebfedea0SLionel Sambuc mulx %i4,%g4,%l3 294*ebfedea0SLionel Sambuc 295*ebfedea0SLionel Sambuc mulx %l2,%l2,%o0 296*ebfedea0SLionel Sambuc and %l3,%g1,%l3 297*ebfedea0SLionel Sambuc 298*ebfedea0SLionel Sambuc mulx %o1,%l3,%o1 299*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 300*ebfedea0SLionel Sambuc add %g4,%o1,%o1 301*ebfedea0SLionel Sambuc and %o0,%g1,%o3 302*ebfedea0SLionel Sambuc ld [%i3+8],%l6 ! np[2] 303*ebfedea0SLionel Sambuc srlx %o1,32,%o1 304*ebfedea0SLionel Sambuc add %g5,%o1,%o1 305*ebfedea0SLionel Sambuc srlx %o0,32,%o0 306*ebfedea0SLionel Sambuc add %o3,%o1,%o1 307*ebfedea0SLionel Sambuc and %o0,1,%i2 308*ebfedea0SLionel Sambuc add %o4,%o1,%o1 309*ebfedea0SLionel Sambuc srlx %o0,1,%o0 310*ebfedea0SLionel Sambuc mov 12,%l1 311*ebfedea0SLionel Sambuc st %o1,[%sp+2047+192] ! tp[0]= 312*ebfedea0SLionel Sambuc srlx %o1,32,%o1 313*ebfedea0SLionel Sambuc add %sp,2047+192+4,%l4 314*ebfedea0SLionel Sambuc 315*ebfedea0SLionel Sambuc.Lsqr_2nd: 316*ebfedea0SLionel Sambuc mulx %l5,%l2,%o3 317*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 318*ebfedea0SLionel Sambuc add %o3,%o0,%o0 319*ebfedea0SLionel Sambuc add %l7,%o1,%o1 320*ebfedea0SLionel Sambuc ld [%i1+%l1],%l5 ! ap[j] 321*ebfedea0SLionel Sambuc and %o0,%g1,%o3 322*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 ! np[j] 323*ebfedea0SLionel Sambuc srlx %o0,32,%o0 324*ebfedea0SLionel Sambuc add %o4,%o1,%o1 325*ebfedea0SLionel Sambuc ld [%l4+8],%l7 ! tp[j] 326*ebfedea0SLionel Sambuc add %o3,%o3,%o3 327*ebfedea0SLionel Sambuc add %l1,4,%l1 ! j++ 328*ebfedea0SLionel Sambuc or %i2,%o3,%o3 329*ebfedea0SLionel Sambuc srlx %o3,32,%i2 330*ebfedea0SLionel Sambuc and %o3,%g1,%o3 331*ebfedea0SLionel Sambuc cmp %l1,%i5 332*ebfedea0SLionel Sambuc add %o3,%o1,%o1 333*ebfedea0SLionel Sambuc st %o1,[%l4] ! tp[j-1] 334*ebfedea0SLionel Sambuc srlx %o1,32,%o1 335*ebfedea0SLionel Sambuc bl %icc,.Lsqr_2nd 336*ebfedea0SLionel Sambuc add %l4,4,%l4 ! tp++ 337*ebfedea0SLionel Sambuc!.Lsqr_2nd 338*ebfedea0SLionel Sambuc 339*ebfedea0SLionel Sambuc mulx %l5,%l2,%o3 340*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 341*ebfedea0SLionel Sambuc add %o3,%o0,%o0 342*ebfedea0SLionel Sambuc add %l7,%o1,%o1 343*ebfedea0SLionel Sambuc and %o0,%g1,%o3 344*ebfedea0SLionel Sambuc srlx %o0,32,%o0 345*ebfedea0SLionel Sambuc add %o4,%o1,%o1 346*ebfedea0SLionel Sambuc add %o3,%o3,%o3 347*ebfedea0SLionel Sambuc or %i2,%o3,%o3 348*ebfedea0SLionel Sambuc srlx %o3,32,%i2 349*ebfedea0SLionel Sambuc and %o3,%g1,%o3 350*ebfedea0SLionel Sambuc add %o3,%o1,%o1 351*ebfedea0SLionel Sambuc st %o1,[%l4] ! tp[j-1] 352*ebfedea0SLionel Sambuc srlx %o1,32,%o1 353*ebfedea0SLionel Sambuc 354*ebfedea0SLionel Sambuc add %o0,%o0,%o0 355*ebfedea0SLionel Sambuc or %i2,%o0,%o0 356*ebfedea0SLionel Sambuc add %o0,%o1,%o1 357*ebfedea0SLionel Sambuc add %o2,%o1,%o1 358*ebfedea0SLionel Sambuc st %o1,[%l4+4] 359*ebfedea0SLionel Sambuc srlx %o1,32,%o2 360*ebfedea0SLionel Sambuc 361*ebfedea0SLionel Sambuc ld [%sp+2047+192],%g5 ! tp[0] 362*ebfedea0SLionel Sambuc ld [%sp+2047+192+4],%l7 ! tp[1] 363*ebfedea0SLionel Sambuc ld [%i1+8],%l2 ! ap[2] 364*ebfedea0SLionel Sambuc ld [%i3],%o1 ! np[0] 365*ebfedea0SLionel Sambuc ld [%i3+4],%l6 ! np[1] 366*ebfedea0SLionel Sambuc mulx %i4,%g5,%l3 367*ebfedea0SLionel Sambuc and %l3,%g1,%l3 368*ebfedea0SLionel Sambuc mov 8,%l0 369*ebfedea0SLionel Sambuc 370*ebfedea0SLionel Sambuc mulx %l2,%l2,%o0 371*ebfedea0SLionel Sambuc mulx %o1,%l3,%o1 372*ebfedea0SLionel Sambuc and %o0,%g1,%o3 373*ebfedea0SLionel Sambuc add %g5,%o1,%o1 374*ebfedea0SLionel Sambuc srlx %o0,32,%o0 375*ebfedea0SLionel Sambuc add %sp,2047+192,%l4 376*ebfedea0SLionel Sambuc srlx %o1,32,%o1 377*ebfedea0SLionel Sambuc and %o0,1,%i2 378*ebfedea0SLionel Sambuc srlx %o0,1,%o0 379*ebfedea0SLionel Sambuc mov 4,%l1 380*ebfedea0SLionel Sambuc 381*ebfedea0SLionel Sambuc.Lsqr_outer: 382*ebfedea0SLionel Sambuc.Lsqr_inner1: 383*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 384*ebfedea0SLionel Sambuc add %l7,%o1,%o1 385*ebfedea0SLionel Sambuc add %l1,4,%l1 386*ebfedea0SLionel Sambuc ld [%l4+8],%l7 387*ebfedea0SLionel Sambuc cmp %l1,%l0 388*ebfedea0SLionel Sambuc add %o4,%o1,%o1 389*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 390*ebfedea0SLionel Sambuc st %o1,[%l4] 391*ebfedea0SLionel Sambuc srlx %o1,32,%o1 392*ebfedea0SLionel Sambuc bl %icc,.Lsqr_inner1 393*ebfedea0SLionel Sambuc add %l4,4,%l4 394*ebfedea0SLionel Sambuc!.Lsqr_inner1 395*ebfedea0SLionel Sambuc 396*ebfedea0SLionel Sambuc add %l1,4,%l1 397*ebfedea0SLionel Sambuc ld [%i1+%l1],%l5 ! ap[j] 398*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 399*ebfedea0SLionel Sambuc add %l7,%o1,%o1 400*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 ! np[j] 401*ebfedea0SLionel Sambuc add %o3,%o1,%o1 402*ebfedea0SLionel Sambuc ld [%l4+8],%l7 ! tp[j] 403*ebfedea0SLionel Sambuc add %o4,%o1,%o1 404*ebfedea0SLionel Sambuc st %o1,[%l4] 405*ebfedea0SLionel Sambuc srlx %o1,32,%o1 406*ebfedea0SLionel Sambuc 407*ebfedea0SLionel Sambuc add %l1,4,%l1 408*ebfedea0SLionel Sambuc cmp %l1,%i5 409*ebfedea0SLionel Sambuc be,pn %icc,.Lsqr_no_inner2 410*ebfedea0SLionel Sambuc add %l4,4,%l4 411*ebfedea0SLionel Sambuc 412*ebfedea0SLionel Sambuc.Lsqr_inner2: 413*ebfedea0SLionel Sambuc mulx %l5,%l2,%o3 414*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 415*ebfedea0SLionel Sambuc add %l7,%o1,%o1 416*ebfedea0SLionel Sambuc add %o3,%o0,%o0 417*ebfedea0SLionel Sambuc ld [%i1+%l1],%l5 ! ap[j] 418*ebfedea0SLionel Sambuc and %o0,%g1,%o3 419*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 ! np[j] 420*ebfedea0SLionel Sambuc srlx %o0,32,%o0 421*ebfedea0SLionel Sambuc add %o3,%o3,%o3 422*ebfedea0SLionel Sambuc ld [%l4+8],%l7 ! tp[j] 423*ebfedea0SLionel Sambuc or %i2,%o3,%o3 424*ebfedea0SLionel Sambuc add %l1,4,%l1 ! j++ 425*ebfedea0SLionel Sambuc srlx %o3,32,%i2 426*ebfedea0SLionel Sambuc and %o3,%g1,%o3 427*ebfedea0SLionel Sambuc cmp %l1,%i5 428*ebfedea0SLionel Sambuc add %o3,%o1,%o1 429*ebfedea0SLionel Sambuc add %o4,%o1,%o1 430*ebfedea0SLionel Sambuc st %o1,[%l4] ! tp[j-1] 431*ebfedea0SLionel Sambuc srlx %o1,32,%o1 432*ebfedea0SLionel Sambuc bl %icc,.Lsqr_inner2 433*ebfedea0SLionel Sambuc add %l4,4,%l4 ! tp++ 434*ebfedea0SLionel Sambuc 435*ebfedea0SLionel Sambuc.Lsqr_no_inner2: 436*ebfedea0SLionel Sambuc mulx %l5,%l2,%o3 437*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 438*ebfedea0SLionel Sambuc add %l7,%o1,%o1 439*ebfedea0SLionel Sambuc add %o3,%o0,%o0 440*ebfedea0SLionel Sambuc and %o0,%g1,%o3 441*ebfedea0SLionel Sambuc srlx %o0,32,%o0 442*ebfedea0SLionel Sambuc add %o3,%o3,%o3 443*ebfedea0SLionel Sambuc or %i2,%o3,%o3 444*ebfedea0SLionel Sambuc srlx %o3,32,%i2 445*ebfedea0SLionel Sambuc and %o3,%g1,%o3 446*ebfedea0SLionel Sambuc add %o3,%o1,%o1 447*ebfedea0SLionel Sambuc add %o4,%o1,%o1 448*ebfedea0SLionel Sambuc st %o1,[%l4] ! tp[j-1] 449*ebfedea0SLionel Sambuc srlx %o1,32,%o1 450*ebfedea0SLionel Sambuc 451*ebfedea0SLionel Sambuc add %o0,%o0,%o0 452*ebfedea0SLionel Sambuc or %i2,%o0,%o0 453*ebfedea0SLionel Sambuc add %o0,%o1,%o1 454*ebfedea0SLionel Sambuc add %o2,%o1,%o1 455*ebfedea0SLionel Sambuc st %o1,[%l4+4] 456*ebfedea0SLionel Sambuc srlx %o1,32,%o2 457*ebfedea0SLionel Sambuc 458*ebfedea0SLionel Sambuc add %l0,4,%l0 ! i++ 459*ebfedea0SLionel Sambuc ld [%sp+2047+192],%g5 ! tp[0] 460*ebfedea0SLionel Sambuc ld [%sp+2047+192+4],%l7 ! tp[1] 461*ebfedea0SLionel Sambuc ld [%i1+%l0],%l2 ! ap[j] 462*ebfedea0SLionel Sambuc ld [%i3],%o1 ! np[0] 463*ebfedea0SLionel Sambuc ld [%i3+4],%l6 ! np[1] 464*ebfedea0SLionel Sambuc mulx %i4,%g5,%l3 465*ebfedea0SLionel Sambuc and %l3,%g1,%l3 466*ebfedea0SLionel Sambuc add %l0,4,%g4 467*ebfedea0SLionel Sambuc 468*ebfedea0SLionel Sambuc mulx %l2,%l2,%o0 469*ebfedea0SLionel Sambuc mulx %o1,%l3,%o1 470*ebfedea0SLionel Sambuc and %o0,%g1,%o3 471*ebfedea0SLionel Sambuc add %g5,%o1,%o1 472*ebfedea0SLionel Sambuc srlx %o0,32,%o0 473*ebfedea0SLionel Sambuc add %sp,2047+192,%l4 474*ebfedea0SLionel Sambuc srlx %o1,32,%o1 475*ebfedea0SLionel Sambuc and %o0,1,%i2 476*ebfedea0SLionel Sambuc srlx %o0,1,%o0 477*ebfedea0SLionel Sambuc 478*ebfedea0SLionel Sambuc cmp %g4,%i5 ! i<num-1 479*ebfedea0SLionel Sambuc bl %icc,.Lsqr_outer 480*ebfedea0SLionel Sambuc mov 4,%l1 481*ebfedea0SLionel Sambuc 482*ebfedea0SLionel Sambuc.Lsqr_last: 483*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 484*ebfedea0SLionel Sambuc add %l7,%o1,%o1 485*ebfedea0SLionel Sambuc add %l1,4,%l1 486*ebfedea0SLionel Sambuc ld [%l4+8],%l7 487*ebfedea0SLionel Sambuc cmp %l1,%l0 488*ebfedea0SLionel Sambuc add %o4,%o1,%o1 489*ebfedea0SLionel Sambuc ld [%i3+%l1],%l6 490*ebfedea0SLionel Sambuc st %o1,[%l4] 491*ebfedea0SLionel Sambuc srlx %o1,32,%o1 492*ebfedea0SLionel Sambuc bl %icc,.Lsqr_last 493*ebfedea0SLionel Sambuc add %l4,4,%l4 494*ebfedea0SLionel Sambuc!.Lsqr_last 495*ebfedea0SLionel Sambuc 496*ebfedea0SLionel Sambuc mulx %l6,%l3,%o4 497*ebfedea0SLionel Sambuc add %l7,%o1,%o1 498*ebfedea0SLionel Sambuc add %o3,%o1,%o1 499*ebfedea0SLionel Sambuc add %o4,%o1,%o1 500*ebfedea0SLionel Sambuc st %o1,[%l4] 501*ebfedea0SLionel Sambuc srlx %o1,32,%o1 502*ebfedea0SLionel Sambuc 503*ebfedea0SLionel Sambuc add %o0,%o0,%o0 ! recover %o0 504*ebfedea0SLionel Sambuc or %i2,%o0,%o0 505*ebfedea0SLionel Sambuc add %o0,%o1,%o1 506*ebfedea0SLionel Sambuc add %o2,%o1,%o1 507*ebfedea0SLionel Sambuc st %o1,[%l4+4] 508*ebfedea0SLionel Sambuc srlx %o1,32,%o2 509*ebfedea0SLionel Sambuc 510*ebfedea0SLionel Sambuc ba .Ltail 511*ebfedea0SLionel Sambuc add %l4,8,%l4 512*ebfedea0SLionel Sambuc.type bn_mul_mont_int,#function 513*ebfedea0SLionel Sambuc.size bn_mul_mont_int,(.-bn_mul_mont_int) 514*ebfedea0SLionel Sambuc.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro@openssl.org>" 515*ebfedea0SLionel Sambuc.align 32 516