1*0Sstevel@tonic-gate#!/usr/local/bin/perl 2*0Sstevel@tonic-gate# x86 assember 3*0Sstevel@tonic-gate 4*0Sstevel@tonic-gatesub mul_add_c 5*0Sstevel@tonic-gate { 6*0Sstevel@tonic-gate local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 7*0Sstevel@tonic-gate 8*0Sstevel@tonic-gate # pos == -1 if eax and edx are pre-loaded, 0 to load from next 9*0Sstevel@tonic-gate # words, and 1 if load return value 10*0Sstevel@tonic-gate 11*0Sstevel@tonic-gate &comment("mul a[$ai]*b[$bi]"); 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gate # "eax" and "edx" will always be pre-loaded. 14*0Sstevel@tonic-gate # &mov("eax",&DWP($ai*4,$a,"",0)) ; 15*0Sstevel@tonic-gate # &mov("edx",&DWP($bi*4,$b,"",0)); 16*0Sstevel@tonic-gate 17*0Sstevel@tonic-gate &mul("edx"); 18*0Sstevel@tonic-gate &add($c0,"eax"); 19*0Sstevel@tonic-gate &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a 20*0Sstevel@tonic-gate &mov("eax",&wparam(0)) if $pos > 0; # load r[] 21*0Sstevel@tonic-gate ### 22*0Sstevel@tonic-gate &adc($c1,"edx"); 23*0Sstevel@tonic-gate &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b 24*0Sstevel@tonic-gate &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b 25*0Sstevel@tonic-gate ### 26*0Sstevel@tonic-gate &adc($c2,0); 27*0Sstevel@tonic-gate # is pos > 1, it means it is the last loop 28*0Sstevel@tonic-gate &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; 29*0Sstevel@tonic-gate &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a 30*0Sstevel@tonic-gate } 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gatesub sqr_add_c 33*0Sstevel@tonic-gate { 34*0Sstevel@tonic-gate local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate # pos == -1 if eax and edx are pre-loaded, 0 to load from next 37*0Sstevel@tonic-gate # words, and 1 if load return value 38*0Sstevel@tonic-gate 39*0Sstevel@tonic-gate &comment("sqr a[$ai]*a[$bi]"); 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate # "eax" and "edx" will always be pre-loaded. 42*0Sstevel@tonic-gate # &mov("eax",&DWP($ai*4,$a,"",0)) ; 43*0Sstevel@tonic-gate # &mov("edx",&DWP($bi*4,$b,"",0)); 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gate if ($ai == $bi) 46*0Sstevel@tonic-gate { &mul("eax");} 47*0Sstevel@tonic-gate else 48*0Sstevel@tonic-gate { &mul("edx");} 49*0Sstevel@tonic-gate &add($c0,"eax"); 50*0Sstevel@tonic-gate &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 51*0Sstevel@tonic-gate ### 52*0Sstevel@tonic-gate &adc($c1,"edx"); 53*0Sstevel@tonic-gate &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); 54*0Sstevel@tonic-gate ### 55*0Sstevel@tonic-gate &adc($c2,0); 56*0Sstevel@tonic-gate # is pos > 1, it means it is the last loop 57*0Sstevel@tonic-gate &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 58*0Sstevel@tonic-gate &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 59*0Sstevel@tonic-gate } 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gatesub sqr_add_c2 62*0Sstevel@tonic-gate { 63*0Sstevel@tonic-gate local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate # pos == -1 if eax and edx are pre-loaded, 0 to load from next 66*0Sstevel@tonic-gate # words, and 1 if load return value 67*0Sstevel@tonic-gate 68*0Sstevel@tonic-gate &comment("sqr a[$ai]*a[$bi]"); 69*0Sstevel@tonic-gate 70*0Sstevel@tonic-gate # "eax" and "edx" will always be pre-loaded. 71*0Sstevel@tonic-gate # &mov("eax",&DWP($ai*4,$a,"",0)) ; 72*0Sstevel@tonic-gate # &mov("edx",&DWP($bi*4,$a,"",0)); 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gate if ($ai == $bi) 75*0Sstevel@tonic-gate { &mul("eax");} 76*0Sstevel@tonic-gate else 77*0Sstevel@tonic-gate { &mul("edx");} 78*0Sstevel@tonic-gate &add("eax","eax"); 79*0Sstevel@tonic-gate ### 80*0Sstevel@tonic-gate &adc("edx","edx"); 81*0Sstevel@tonic-gate ### 82*0Sstevel@tonic-gate &adc($c2,0); 83*0Sstevel@tonic-gate &add($c0,"eax"); 84*0Sstevel@tonic-gate &adc($c1,"edx"); 85*0Sstevel@tonic-gate &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 86*0Sstevel@tonic-gate &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 87*0Sstevel@tonic-gate &adc($c2,0); 88*0Sstevel@tonic-gate &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 89*0Sstevel@tonic-gate &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); 90*0Sstevel@tonic-gate ### 91*0Sstevel@tonic-gate } 92*0Sstevel@tonic-gate 93*0Sstevel@tonic-gatesub bn_mul_comba 94*0Sstevel@tonic-gate { 95*0Sstevel@tonic-gate local($name,$num)=@_; 96*0Sstevel@tonic-gate local($a,$b,$c0,$c1,$c2); 97*0Sstevel@tonic-gate local($i,$as,$ae,$bs,$be,$ai,$bi); 98*0Sstevel@tonic-gate local($tot,$end); 99*0Sstevel@tonic-gate 100*0Sstevel@tonic-gate &function_begin_B($name,""); 101*0Sstevel@tonic-gate 102*0Sstevel@tonic-gate $c0="ebx"; 103*0Sstevel@tonic-gate $c1="ecx"; 104*0Sstevel@tonic-gate $c2="ebp"; 105*0Sstevel@tonic-gate $a="esi"; 106*0Sstevel@tonic-gate $b="edi"; 107*0Sstevel@tonic-gate 108*0Sstevel@tonic-gate $as=0; 109*0Sstevel@tonic-gate $ae=0; 110*0Sstevel@tonic-gate $bs=0; 111*0Sstevel@tonic-gate $be=0; 112*0Sstevel@tonic-gate $tot=$num+$num-1; 113*0Sstevel@tonic-gate 114*0Sstevel@tonic-gate &push("esi"); 115*0Sstevel@tonic-gate &mov($a,&wparam(1)); 116*0Sstevel@tonic-gate &push("edi"); 117*0Sstevel@tonic-gate &mov($b,&wparam(2)); 118*0Sstevel@tonic-gate &push("ebp"); 119*0Sstevel@tonic-gate &push("ebx"); 120*0Sstevel@tonic-gate 121*0Sstevel@tonic-gate &xor($c0,$c0); 122*0Sstevel@tonic-gate &mov("eax",&DWP(0,$a,"",0)); # load the first word 123*0Sstevel@tonic-gate &xor($c1,$c1); 124*0Sstevel@tonic-gate &mov("edx",&DWP(0,$b,"",0)); # load the first second 125*0Sstevel@tonic-gate 126*0Sstevel@tonic-gate for ($i=0; $i<$tot; $i++) 127*0Sstevel@tonic-gate { 128*0Sstevel@tonic-gate $ai=$as; 129*0Sstevel@tonic-gate $bi=$bs; 130*0Sstevel@tonic-gate $end=$be+1; 131*0Sstevel@tonic-gate 132*0Sstevel@tonic-gate &comment("################## Calculate word $i"); 133*0Sstevel@tonic-gate 134*0Sstevel@tonic-gate for ($j=$bs; $j<$end; $j++) 135*0Sstevel@tonic-gate { 136*0Sstevel@tonic-gate &xor($c2,$c2) if ($j == $bs); 137*0Sstevel@tonic-gate if (($j+1) == $end) 138*0Sstevel@tonic-gate { 139*0Sstevel@tonic-gate $v=1; 140*0Sstevel@tonic-gate $v=2 if (($i+1) == $tot); 141*0Sstevel@tonic-gate } 142*0Sstevel@tonic-gate else 143*0Sstevel@tonic-gate { $v=0; } 144*0Sstevel@tonic-gate if (($j+1) != $end) 145*0Sstevel@tonic-gate { 146*0Sstevel@tonic-gate $na=($ai-1); 147*0Sstevel@tonic-gate $nb=($bi+1); 148*0Sstevel@tonic-gate } 149*0Sstevel@tonic-gate else 150*0Sstevel@tonic-gate { 151*0Sstevel@tonic-gate $na=$as+($i < ($num-1)); 152*0Sstevel@tonic-gate $nb=$bs+($i >= ($num-1)); 153*0Sstevel@tonic-gate } 154*0Sstevel@tonic-gate#printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; 155*0Sstevel@tonic-gate &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); 156*0Sstevel@tonic-gate if ($v) 157*0Sstevel@tonic-gate { 158*0Sstevel@tonic-gate &comment("saved r[$i]"); 159*0Sstevel@tonic-gate # &mov("eax",&wparam(0)); 160*0Sstevel@tonic-gate # &mov(&DWP($i*4,"eax","",0),$c0); 161*0Sstevel@tonic-gate ($c0,$c1,$c2)=($c1,$c2,$c0); 162*0Sstevel@tonic-gate } 163*0Sstevel@tonic-gate $ai--; 164*0Sstevel@tonic-gate $bi++; 165*0Sstevel@tonic-gate } 166*0Sstevel@tonic-gate $as++ if ($i < ($num-1)); 167*0Sstevel@tonic-gate $ae++ if ($i >= ($num-1)); 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gate $bs++ if ($i >= ($num-1)); 170*0Sstevel@tonic-gate $be++ if ($i < ($num-1)); 171*0Sstevel@tonic-gate } 172*0Sstevel@tonic-gate &comment("save r[$i]"); 173*0Sstevel@tonic-gate # &mov("eax",&wparam(0)); 174*0Sstevel@tonic-gate &mov(&DWP($i*4,"eax","",0),$c0); 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gate &pop("ebx"); 177*0Sstevel@tonic-gate &pop("ebp"); 178*0Sstevel@tonic-gate &pop("edi"); 179*0Sstevel@tonic-gate &pop("esi"); 180*0Sstevel@tonic-gate &ret(); 181*0Sstevel@tonic-gate &function_end_B($name); 182*0Sstevel@tonic-gate } 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gatesub bn_sqr_comba 185*0Sstevel@tonic-gate { 186*0Sstevel@tonic-gate local($name,$num)=@_; 187*0Sstevel@tonic-gate local($r,$a,$c0,$c1,$c2)=@_; 188*0Sstevel@tonic-gate local($i,$as,$ae,$bs,$be,$ai,$bi); 189*0Sstevel@tonic-gate local($b,$tot,$end,$half); 190*0Sstevel@tonic-gate 191*0Sstevel@tonic-gate &function_begin_B($name,""); 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate $c0="ebx"; 194*0Sstevel@tonic-gate $c1="ecx"; 195*0Sstevel@tonic-gate $c2="ebp"; 196*0Sstevel@tonic-gate $a="esi"; 197*0Sstevel@tonic-gate $r="edi"; 198*0Sstevel@tonic-gate 199*0Sstevel@tonic-gate &push("esi"); 200*0Sstevel@tonic-gate &push("edi"); 201*0Sstevel@tonic-gate &push("ebp"); 202*0Sstevel@tonic-gate &push("ebx"); 203*0Sstevel@tonic-gate &mov($r,&wparam(0)); 204*0Sstevel@tonic-gate &mov($a,&wparam(1)); 205*0Sstevel@tonic-gate &xor($c0,$c0); 206*0Sstevel@tonic-gate &xor($c1,$c1); 207*0Sstevel@tonic-gate &mov("eax",&DWP(0,$a,"",0)); # load the first word 208*0Sstevel@tonic-gate 209*0Sstevel@tonic-gate $as=0; 210*0Sstevel@tonic-gate $ae=0; 211*0Sstevel@tonic-gate $bs=0; 212*0Sstevel@tonic-gate $be=0; 213*0Sstevel@tonic-gate $tot=$num+$num-1; 214*0Sstevel@tonic-gate 215*0Sstevel@tonic-gate for ($i=0; $i<$tot; $i++) 216*0Sstevel@tonic-gate { 217*0Sstevel@tonic-gate $ai=$as; 218*0Sstevel@tonic-gate $bi=$bs; 219*0Sstevel@tonic-gate $end=$be+1; 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gate &comment("############### Calculate word $i"); 222*0Sstevel@tonic-gate for ($j=$bs; $j<$end; $j++) 223*0Sstevel@tonic-gate { 224*0Sstevel@tonic-gate &xor($c2,$c2) if ($j == $bs); 225*0Sstevel@tonic-gate if (($ai-1) < ($bi+1)) 226*0Sstevel@tonic-gate { 227*0Sstevel@tonic-gate $v=1; 228*0Sstevel@tonic-gate $v=2 if ($i+1) == $tot; 229*0Sstevel@tonic-gate } 230*0Sstevel@tonic-gate else 231*0Sstevel@tonic-gate { $v=0; } 232*0Sstevel@tonic-gate if (!$v) 233*0Sstevel@tonic-gate { 234*0Sstevel@tonic-gate $na=$ai-1; 235*0Sstevel@tonic-gate $nb=$bi+1; 236*0Sstevel@tonic-gate } 237*0Sstevel@tonic-gate else 238*0Sstevel@tonic-gate { 239*0Sstevel@tonic-gate $na=$as+($i < ($num-1)); 240*0Sstevel@tonic-gate $nb=$bs+($i >= ($num-1)); 241*0Sstevel@tonic-gate } 242*0Sstevel@tonic-gate if ($ai == $bi) 243*0Sstevel@tonic-gate { 244*0Sstevel@tonic-gate &sqr_add_c($r,$a,$ai,$bi, 245*0Sstevel@tonic-gate $c0,$c1,$c2,$v,$i,$na,$nb); 246*0Sstevel@tonic-gate } 247*0Sstevel@tonic-gate else 248*0Sstevel@tonic-gate { 249*0Sstevel@tonic-gate &sqr_add_c2($r,$a,$ai,$bi, 250*0Sstevel@tonic-gate $c0,$c1,$c2,$v,$i,$na,$nb); 251*0Sstevel@tonic-gate } 252*0Sstevel@tonic-gate if ($v) 253*0Sstevel@tonic-gate { 254*0Sstevel@tonic-gate &comment("saved r[$i]"); 255*0Sstevel@tonic-gate #&mov(&DWP($i*4,$r,"",0),$c0); 256*0Sstevel@tonic-gate ($c0,$c1,$c2)=($c1,$c2,$c0); 257*0Sstevel@tonic-gate last; 258*0Sstevel@tonic-gate } 259*0Sstevel@tonic-gate $ai--; 260*0Sstevel@tonic-gate $bi++; 261*0Sstevel@tonic-gate } 262*0Sstevel@tonic-gate $as++ if ($i < ($num-1)); 263*0Sstevel@tonic-gate $ae++ if ($i >= ($num-1)); 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate $bs++ if ($i >= ($num-1)); 266*0Sstevel@tonic-gate $be++ if ($i < ($num-1)); 267*0Sstevel@tonic-gate } 268*0Sstevel@tonic-gate &mov(&DWP($i*4,$r,"",0),$c0); 269*0Sstevel@tonic-gate &pop("ebx"); 270*0Sstevel@tonic-gate &pop("ebp"); 271*0Sstevel@tonic-gate &pop("edi"); 272*0Sstevel@tonic-gate &pop("esi"); 273*0Sstevel@tonic-gate &ret(); 274*0Sstevel@tonic-gate &function_end_B($name); 275*0Sstevel@tonic-gate } 276*0Sstevel@tonic-gate 277*0Sstevel@tonic-gate1; 278