10Sstevel@tonic-gate#!/usr/local/bin/perl 20Sstevel@tonic-gate 30Sstevel@tonic-gate# It was noted that Intel IA-32 C compiler generates code which 40Sstevel@tonic-gate# performs ~30% *faster* on P4 CPU than original *hand-coded* 50Sstevel@tonic-gate# SHA1 assembler implementation. To address this problem (and 60Sstevel@tonic-gate# prove that humans are still better than machines:-), the 70Sstevel@tonic-gate# original code was overhauled, which resulted in following 80Sstevel@tonic-gate# performance changes: 90Sstevel@tonic-gate# 100Sstevel@tonic-gate# compared with original compared with Intel cc 110Sstevel@tonic-gate# assembler impl. generated code 12*2139Sjp161948# Pentium -16% +48% 130Sstevel@tonic-gate# PIII/AMD +8% +16% 140Sstevel@tonic-gate# P4 +85%(!) +45% 150Sstevel@tonic-gate# 160Sstevel@tonic-gate# As you can see Pentium came out as looser:-( Yet I reckoned that 170Sstevel@tonic-gate# improvement on P4 outweights the loss and incorporate this 180Sstevel@tonic-gate# re-tuned code to 0.9.7 and later. 190Sstevel@tonic-gate# ---------------------------------------------------------------- 200Sstevel@tonic-gate# Those who for any particular reason absolutely must score on 210Sstevel@tonic-gate# Pentium can replace this module with one from 0.9.6 distribution. 220Sstevel@tonic-gate# This "offer" shall be revoked the moment programming interface to 230Sstevel@tonic-gate# this module is changed, in which case this paragraph should be 240Sstevel@tonic-gate# removed. 250Sstevel@tonic-gate# ---------------------------------------------------------------- 260Sstevel@tonic-gate# <appro@fy.chalmers.se> 270Sstevel@tonic-gate 280Sstevel@tonic-gate$normal=0; 290Sstevel@tonic-gate 300Sstevel@tonic-gatepush(@INC,"perlasm","../../perlasm"); 310Sstevel@tonic-gaterequire "x86asm.pl"; 320Sstevel@tonic-gate 330Sstevel@tonic-gate&asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386"); 340Sstevel@tonic-gate 350Sstevel@tonic-gate$A="eax"; 360Sstevel@tonic-gate$B="ecx"; 370Sstevel@tonic-gate$C="ebx"; 380Sstevel@tonic-gate$D="edx"; 390Sstevel@tonic-gate$E="edi"; 400Sstevel@tonic-gate$T="esi"; 410Sstevel@tonic-gate$tmp1="ebp"; 420Sstevel@tonic-gate 430Sstevel@tonic-gate$off=9*4; 440Sstevel@tonic-gate 450Sstevel@tonic-gate@K=(0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6); 460Sstevel@tonic-gate 470Sstevel@tonic-gate&sha1_block_data("sha1_block_asm_data_order"); 480Sstevel@tonic-gate 490Sstevel@tonic-gate&asm_finish(); 500Sstevel@tonic-gate 510Sstevel@tonic-gatesub Nn 520Sstevel@tonic-gate { 530Sstevel@tonic-gate local($p)=@_; 540Sstevel@tonic-gate local(%n)=($A,$T,$B,$A,$C,$B,$D,$C,$E,$D,$T,$E); 550Sstevel@tonic-gate return($n{$p}); 560Sstevel@tonic-gate } 570Sstevel@tonic-gate 580Sstevel@tonic-gatesub Np 590Sstevel@tonic-gate { 600Sstevel@tonic-gate local($p)=@_; 610Sstevel@tonic-gate local(%n)=($A,$T,$B,$A,$C,$B,$D,$C,$E,$D,$T,$E); 620Sstevel@tonic-gate local(%n)=($A,$B,$B,$C,$C,$D,$D,$E,$E,$T,$T,$A); 630Sstevel@tonic-gate return($n{$p}); 640Sstevel@tonic-gate } 650Sstevel@tonic-gate 660Sstevel@tonic-gatesub Na 670Sstevel@tonic-gate { 680Sstevel@tonic-gate local($n)=@_; 690Sstevel@tonic-gate return( (($n )&0x0f), 700Sstevel@tonic-gate (($n+ 2)&0x0f), 710Sstevel@tonic-gate (($n+ 8)&0x0f), 720Sstevel@tonic-gate (($n+13)&0x0f), 730Sstevel@tonic-gate (($n+ 1)&0x0f)); 740Sstevel@tonic-gate } 750Sstevel@tonic-gate 760Sstevel@tonic-gatesub X_expand 770Sstevel@tonic-gate { 780Sstevel@tonic-gate local($in)=@_; 790Sstevel@tonic-gate 800Sstevel@tonic-gate &comment("First, load the words onto the stack in network byte order"); 810Sstevel@tonic-gate for ($i=0; $i<16; $i+=2) 820Sstevel@tonic-gate { 830Sstevel@tonic-gate &mov($A,&DWP(($i+0)*4,$in,"",0));# unless $i == 0; 840Sstevel@tonic-gate &mov($B,&DWP(($i+1)*4,$in,"",0)); 850Sstevel@tonic-gate &bswap($A); 860Sstevel@tonic-gate &bswap($B); 870Sstevel@tonic-gate &mov(&swtmp($i+0),$A); 880Sstevel@tonic-gate &mov(&swtmp($i+1),$B); 890Sstevel@tonic-gate } 900Sstevel@tonic-gate 910Sstevel@tonic-gate &comment("We now have the X array on the stack"); 920Sstevel@tonic-gate &comment("starting at sp-4"); 930Sstevel@tonic-gate } 940Sstevel@tonic-gate 950Sstevel@tonic-gate# Rules of engagement 960Sstevel@tonic-gate# F is always trashable at the start, the running total. 970Sstevel@tonic-gate# E becomes the next F so it can be trashed after it has been 'accumulated' 980Sstevel@tonic-gate# F becomes A in the next round. We don't need to access it much. 990Sstevel@tonic-gate# During the X update part, the result ends up in $X[$n0]. 1000Sstevel@tonic-gate 1010Sstevel@tonic-gatesub BODY_00_15 1020Sstevel@tonic-gate { 1030Sstevel@tonic-gate local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_; 1040Sstevel@tonic-gate 1050Sstevel@tonic-gate &comment("00_15 $n"); 1060Sstevel@tonic-gate 107*2139Sjp161948 &mov($f,$c); # f to hold F_00_19(b,c,d) 108*2139Sjp161948 if ($n==0) { &mov($tmp1,$a); } 109*2139Sjp161948 else { &mov($a,$tmp1); } 1100Sstevel@tonic-gate &rotl($tmp1,5); # tmp1=ROTATE(a,5) 1110Sstevel@tonic-gate &xor($f,$d); 1120Sstevel@tonic-gate &and($f,$b); 113*2139Sjp161948 &add($tmp1,$e); # tmp1+=e; 114*2139Sjp161948 &mov($e,&swtmp($n)); # e becomes volatile and 1150Sstevel@tonic-gate # is loaded with xi 116*2139Sjp161948 &xor($f,$d); # f holds F_00_19(b,c,d) 117*2139Sjp161948 &rotr($b,2); # b=ROTATE(b,30) 1180Sstevel@tonic-gate &lea($tmp1,&DWP($K,$tmp1,$e,1));# tmp1+=K_00_19+xi 119*2139Sjp161948 120*2139Sjp161948 if ($n==15) { &add($f,$tmp1); } # f+=tmp1 121*2139Sjp161948 else { &add($tmp1,$f); } 1220Sstevel@tonic-gate } 1230Sstevel@tonic-gate 1240Sstevel@tonic-gatesub BODY_16_19 1250Sstevel@tonic-gate { 1260Sstevel@tonic-gate local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_; 1270Sstevel@tonic-gate local($n0,$n1,$n2,$n3,$np)=&Na($n); 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate &comment("16_19 $n"); 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate &mov($f,&swtmp($n1)); # f to hold Xupdate(xi,xa,xb,xc,xd) 1320Sstevel@tonic-gate &mov($tmp1,$c); # tmp1 to hold F_00_19(b,c,d) 1330Sstevel@tonic-gate &xor($f,&swtmp($n0)); 1340Sstevel@tonic-gate &xor($tmp1,$d); 1350Sstevel@tonic-gate &xor($f,&swtmp($n2)); 1360Sstevel@tonic-gate &and($tmp1,$b); # tmp1 holds F_00_19(b,c,d) 137*2139Sjp161948 &rotr($b,2); # b=ROTATE(b,30) 138*2139Sjp161948 &xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd 139*2139Sjp161948 &rotl($f,1); # f=ROATE(f,1) 140*2139Sjp161948 &xor($tmp1,$d); # tmp1=F_00_19(b,c,d) 1410Sstevel@tonic-gate &mov(&swtmp($n0),$f); # xi=f 1420Sstevel@tonic-gate &lea($f,&DWP($K,$f,$e,1)); # f+=K_00_19+e 1430Sstevel@tonic-gate &mov($e,$a); # e becomes volatile 144*2139Sjp161948 &rotl($e,5); # e=ROTATE(a,5) 145*2139Sjp161948 &add($f,$tmp1); # f+=F_00_19(b,c,d) 1460Sstevel@tonic-gate &add($f,$e); # f+=ROTATE(a,5) 1470Sstevel@tonic-gate } 1480Sstevel@tonic-gate 1490Sstevel@tonic-gatesub BODY_20_39 1500Sstevel@tonic-gate { 1510Sstevel@tonic-gate local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_; 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate &comment("20_39 $n"); 1540Sstevel@tonic-gate local($n0,$n1,$n2,$n3,$np)=&Na($n); 1550Sstevel@tonic-gate 156*2139Sjp161948 &mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d) 157*2139Sjp161948 &mov($f,&swtmp($n0)); # f to hold Xupdate(xi,xa,xb,xc,xd) 158*2139Sjp161948 &rotr($b,2); # b=ROTATE(b,30) 159*2139Sjp161948 &xor($f,&swtmp($n1)); 160*2139Sjp161948 &xor($tmp1,$c); 161*2139Sjp161948 &xor($f,&swtmp($n2)); 162*2139Sjp161948 &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d) 163*2139Sjp161948 &xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd 1640Sstevel@tonic-gate &rotl($f,1); # f=ROTATE(f,1) 165*2139Sjp161948 &add($tmp1,$e); 1660Sstevel@tonic-gate &mov(&swtmp($n0),$f); # xi=f 1670Sstevel@tonic-gate &mov($e,$a); # e becomes volatile 1680Sstevel@tonic-gate &rotl($e,5); # e=ROTATE(a,5) 169*2139Sjp161948 &lea($f,&DWP($K,$f,$tmp1,1)); # f+=K_20_39+e 1700Sstevel@tonic-gate &add($f,$e); # f+=ROTATE(a,5) 1710Sstevel@tonic-gate } 1720Sstevel@tonic-gate 1730Sstevel@tonic-gatesub BODY_40_59 1740Sstevel@tonic-gate { 1750Sstevel@tonic-gate local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_; 1760Sstevel@tonic-gate 1770Sstevel@tonic-gate &comment("40_59 $n"); 1780Sstevel@tonic-gate local($n0,$n1,$n2,$n3,$np)=&Na($n); 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate &mov($f,&swtmp($n0)); # f to hold Xupdate(xi,xa,xb,xc,xd) 181*2139Sjp161948 &mov($tmp1,&swtmp($n1)); 182*2139Sjp161948 &xor($f,$tmp1); 183*2139Sjp161948 &mov($tmp1,&swtmp($n2)); 184*2139Sjp161948 &xor($f,$tmp1); 185*2139Sjp161948 &mov($tmp1,&swtmp($n3)); 186*2139Sjp161948 &xor($f,$tmp1); # f holds xa^xb^xc^xd 1870Sstevel@tonic-gate &mov($tmp1,$b); # tmp1 to hold F_40_59(b,c,d) 188*2139Sjp161948 &rotl($f,1); # f=ROTATE(f,1) 1890Sstevel@tonic-gate &or($tmp1,$c); 190*2139Sjp161948 &mov(&swtmp($n0),$f); # xi=f 1910Sstevel@tonic-gate &and($tmp1,$d); 1920Sstevel@tonic-gate &lea($f,&DWP($K,$f,$e,1)); # f+=K_40_59+e 1930Sstevel@tonic-gate &mov($e,$b); # e becomes volatile and is used 1940Sstevel@tonic-gate # to calculate F_40_59(b,c,d) 1950Sstevel@tonic-gate &rotr($b,2); # b=ROTATE(b,30) 1960Sstevel@tonic-gate &and($e,$c); 1970Sstevel@tonic-gate &or($tmp1,$e); # tmp1 holds F_40_59(b,c,d) 1980Sstevel@tonic-gate &mov($e,$a); 1990Sstevel@tonic-gate &rotl($e,5); # e=ROTATE(a,5) 200*2139Sjp161948 &add($f,$tmp1); # f+=tmp1; 201*2139Sjp161948 &add($f,$e); # f+=ROTATE(a,5) 2020Sstevel@tonic-gate } 2030Sstevel@tonic-gate 2040Sstevel@tonic-gatesub BODY_60_79 2050Sstevel@tonic-gate { 2060Sstevel@tonic-gate &BODY_20_39(@_); 2070Sstevel@tonic-gate } 2080Sstevel@tonic-gate 2090Sstevel@tonic-gatesub sha1_block_host 2100Sstevel@tonic-gate { 2110Sstevel@tonic-gate local($name, $sclabel)=@_; 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate &function_begin_B($name,""); 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate # parameter 1 is the MD5_CTX structure. 2160Sstevel@tonic-gate # A 0 2170Sstevel@tonic-gate # B 4 2180Sstevel@tonic-gate # C 8 2190Sstevel@tonic-gate # D 12 2200Sstevel@tonic-gate # E 16 2210Sstevel@tonic-gate 2220Sstevel@tonic-gate &mov("ecx", &wparam(2)); 2230Sstevel@tonic-gate &push("esi"); 2240Sstevel@tonic-gate &shl("ecx",6); 2250Sstevel@tonic-gate &mov("esi", &wparam(1)); 2260Sstevel@tonic-gate &push("ebp"); 2270Sstevel@tonic-gate &add("ecx","esi"); # offset to leave on 2280Sstevel@tonic-gate &push("ebx"); 2290Sstevel@tonic-gate &mov("ebp", &wparam(0)); 2300Sstevel@tonic-gate &push("edi"); 2310Sstevel@tonic-gate &mov($D, &DWP(12,"ebp","",0)); 2320Sstevel@tonic-gate &stack_push(18+9); 2330Sstevel@tonic-gate &mov($E, &DWP(16,"ebp","",0)); 2340Sstevel@tonic-gate &mov($C, &DWP( 8,"ebp","",0)); 2350Sstevel@tonic-gate &mov(&swtmp(17),"ecx"); 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate &comment("First we need to setup the X array"); 2380Sstevel@tonic-gate 2390Sstevel@tonic-gate for ($i=0; $i<16; $i+=2) 2400Sstevel@tonic-gate { 2410Sstevel@tonic-gate &mov($A,&DWP(($i+0)*4,"esi","",0));# unless $i == 0; 2420Sstevel@tonic-gate &mov($B,&DWP(($i+1)*4,"esi","",0)); 2430Sstevel@tonic-gate &mov(&swtmp($i+0),$A); 2440Sstevel@tonic-gate &mov(&swtmp($i+1),$B); 2450Sstevel@tonic-gate } 2460Sstevel@tonic-gate &jmp($sclabel); 2470Sstevel@tonic-gate &function_end_B($name); 2480Sstevel@tonic-gate } 2490Sstevel@tonic-gate 2500Sstevel@tonic-gate 2510Sstevel@tonic-gatesub sha1_block_data 2520Sstevel@tonic-gate { 2530Sstevel@tonic-gate local($name)=@_; 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate &function_begin_B($name,""); 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate # parameter 1 is the MD5_CTX structure. 2580Sstevel@tonic-gate # A 0 2590Sstevel@tonic-gate # B 4 2600Sstevel@tonic-gate # C 8 2610Sstevel@tonic-gate # D 12 2620Sstevel@tonic-gate # E 16 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate &mov("ecx", &wparam(2)); 2650Sstevel@tonic-gate &push("esi"); 2660Sstevel@tonic-gate &shl("ecx",6); 2670Sstevel@tonic-gate &mov("esi", &wparam(1)); 2680Sstevel@tonic-gate &push("ebp"); 2690Sstevel@tonic-gate &add("ecx","esi"); # offset to leave on 2700Sstevel@tonic-gate &push("ebx"); 2710Sstevel@tonic-gate &mov("ebp", &wparam(0)); 2720Sstevel@tonic-gate &push("edi"); 2730Sstevel@tonic-gate &mov($D, &DWP(12,"ebp","",0)); 2740Sstevel@tonic-gate &stack_push(18+9); 2750Sstevel@tonic-gate &mov($E, &DWP(16,"ebp","",0)); 2760Sstevel@tonic-gate &mov($C, &DWP( 8,"ebp","",0)); 2770Sstevel@tonic-gate &mov(&swtmp(17),"ecx"); 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate &comment("First we need to setup the X array"); 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate &set_label("start") unless $normal; 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate &X_expand("esi"); 2840Sstevel@tonic-gate &mov(&wparam(1),"esi"); 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate &set_label("shortcut", 0, 1); 2870Sstevel@tonic-gate &comment(""); 2880Sstevel@tonic-gate &comment("Start processing"); 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate # odd start 2910Sstevel@tonic-gate &mov($A, &DWP( 0,"ebp","",0)); 2920Sstevel@tonic-gate &mov($B, &DWP( 4,"ebp","",0)); 2930Sstevel@tonic-gate $X="esp"; 2940Sstevel@tonic-gate &BODY_00_15(-2,$K[0],$X, 0,$A,$B,$C,$D,$E,$T); 2950Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 1,$T,$A,$B,$C,$D,$E); 2960Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 2,$E,$T,$A,$B,$C,$D); 2970Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 3,$D,$E,$T,$A,$B,$C); 2980Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 4,$C,$D,$E,$T,$A,$B); 2990Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 5,$B,$C,$D,$E,$T,$A); 3000Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 6,$A,$B,$C,$D,$E,$T); 3010Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 7,$T,$A,$B,$C,$D,$E); 3020Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 8,$E,$T,$A,$B,$C,$D); 3030Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X, 9,$D,$E,$T,$A,$B,$C); 3040Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X,10,$C,$D,$E,$T,$A,$B); 3050Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X,11,$B,$C,$D,$E,$T,$A); 3060Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X,12,$A,$B,$C,$D,$E,$T); 3070Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X,13,$T,$A,$B,$C,$D,$E); 3080Sstevel@tonic-gate &BODY_00_15( 0,$K[0],$X,14,$E,$T,$A,$B,$C,$D); 3090Sstevel@tonic-gate &BODY_00_15( 1,$K[0],$X,15,$D,$E,$T,$A,$B,$C); 3100Sstevel@tonic-gate &BODY_16_19(-1,$K[0],$X,16,$C,$D,$E,$T,$A,$B); 3110Sstevel@tonic-gate &BODY_16_19( 0,$K[0],$X,17,$B,$C,$D,$E,$T,$A); 3120Sstevel@tonic-gate &BODY_16_19( 0,$K[0],$X,18,$A,$B,$C,$D,$E,$T); 3130Sstevel@tonic-gate &BODY_16_19( 1,$K[0],$X,19,$T,$A,$B,$C,$D,$E); 3140Sstevel@tonic-gate 3150Sstevel@tonic-gate &BODY_20_39(-1,$K[1],$X,20,$E,$T,$A,$B,$C,$D); 3160Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,21,$D,$E,$T,$A,$B,$C); 3170Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,22,$C,$D,$E,$T,$A,$B); 3180Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,23,$B,$C,$D,$E,$T,$A); 3190Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,24,$A,$B,$C,$D,$E,$T); 3200Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,25,$T,$A,$B,$C,$D,$E); 3210Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,26,$E,$T,$A,$B,$C,$D); 3220Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,27,$D,$E,$T,$A,$B,$C); 3230Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,28,$C,$D,$E,$T,$A,$B); 3240Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,29,$B,$C,$D,$E,$T,$A); 3250Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,30,$A,$B,$C,$D,$E,$T); 3260Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,31,$T,$A,$B,$C,$D,$E); 3270Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,32,$E,$T,$A,$B,$C,$D); 3280Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,33,$D,$E,$T,$A,$B,$C); 3290Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,34,$C,$D,$E,$T,$A,$B); 3300Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,35,$B,$C,$D,$E,$T,$A); 3310Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,36,$A,$B,$C,$D,$E,$T); 3320Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,37,$T,$A,$B,$C,$D,$E); 3330Sstevel@tonic-gate &BODY_20_39( 0,$K[1],$X,38,$E,$T,$A,$B,$C,$D); 3340Sstevel@tonic-gate &BODY_20_39( 1,$K[1],$X,39,$D,$E,$T,$A,$B,$C); 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate &BODY_40_59(-1,$K[2],$X,40,$C,$D,$E,$T,$A,$B); 3370Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,41,$B,$C,$D,$E,$T,$A); 3380Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,42,$A,$B,$C,$D,$E,$T); 3390Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,43,$T,$A,$B,$C,$D,$E); 3400Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,44,$E,$T,$A,$B,$C,$D); 3410Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,45,$D,$E,$T,$A,$B,$C); 3420Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,46,$C,$D,$E,$T,$A,$B); 3430Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,47,$B,$C,$D,$E,$T,$A); 3440Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,48,$A,$B,$C,$D,$E,$T); 3450Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,49,$T,$A,$B,$C,$D,$E); 3460Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,50,$E,$T,$A,$B,$C,$D); 3470Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,51,$D,$E,$T,$A,$B,$C); 3480Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,52,$C,$D,$E,$T,$A,$B); 3490Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,53,$B,$C,$D,$E,$T,$A); 3500Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,54,$A,$B,$C,$D,$E,$T); 3510Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,55,$T,$A,$B,$C,$D,$E); 3520Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,56,$E,$T,$A,$B,$C,$D); 3530Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,57,$D,$E,$T,$A,$B,$C); 3540Sstevel@tonic-gate &BODY_40_59( 0,$K[2],$X,58,$C,$D,$E,$T,$A,$B); 3550Sstevel@tonic-gate &BODY_40_59( 1,$K[2],$X,59,$B,$C,$D,$E,$T,$A); 3560Sstevel@tonic-gate 3570Sstevel@tonic-gate &BODY_60_79(-1,$K[3],$X,60,$A,$B,$C,$D,$E,$T); 3580Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,61,$T,$A,$B,$C,$D,$E); 3590Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,62,$E,$T,$A,$B,$C,$D); 3600Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,63,$D,$E,$T,$A,$B,$C); 3610Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,64,$C,$D,$E,$T,$A,$B); 3620Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,65,$B,$C,$D,$E,$T,$A); 3630Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,66,$A,$B,$C,$D,$E,$T); 3640Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,67,$T,$A,$B,$C,$D,$E); 3650Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,68,$E,$T,$A,$B,$C,$D); 3660Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,69,$D,$E,$T,$A,$B,$C); 3670Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,70,$C,$D,$E,$T,$A,$B); 3680Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,71,$B,$C,$D,$E,$T,$A); 3690Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,72,$A,$B,$C,$D,$E,$T); 3700Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,73,$T,$A,$B,$C,$D,$E); 3710Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,74,$E,$T,$A,$B,$C,$D); 3720Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,75,$D,$E,$T,$A,$B,$C); 3730Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,76,$C,$D,$E,$T,$A,$B); 3740Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,77,$B,$C,$D,$E,$T,$A); 3750Sstevel@tonic-gate &BODY_60_79( 0,$K[3],$X,78,$A,$B,$C,$D,$E,$T); 3760Sstevel@tonic-gate &BODY_60_79( 2,$K[3],$X,79,$T,$A,$B,$C,$D,$E); 3770Sstevel@tonic-gate 3780Sstevel@tonic-gate &comment("End processing"); 3790Sstevel@tonic-gate &comment(""); 3800Sstevel@tonic-gate # D is the tmp value 3810Sstevel@tonic-gate 3820Sstevel@tonic-gate # E -> A 3830Sstevel@tonic-gate # T -> B 3840Sstevel@tonic-gate # A -> C 3850Sstevel@tonic-gate # B -> D 3860Sstevel@tonic-gate # C -> E 3870Sstevel@tonic-gate # D -> T 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate &mov($tmp1,&wparam(0)); 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate &mov($D, &DWP(12,$tmp1,"",0)); 3920Sstevel@tonic-gate &add($D,$B); 3930Sstevel@tonic-gate &mov($B, &DWP( 4,$tmp1,"",0)); 3940Sstevel@tonic-gate &add($B,$T); 3950Sstevel@tonic-gate &mov($T, $A); 3960Sstevel@tonic-gate &mov($A, &DWP( 0,$tmp1,"",0)); 3970Sstevel@tonic-gate &mov(&DWP(12,$tmp1,"",0),$D); 3980Sstevel@tonic-gate 3990Sstevel@tonic-gate &add($A,$E); 4000Sstevel@tonic-gate &mov($E, &DWP(16,$tmp1,"",0)); 4010Sstevel@tonic-gate &add($E,$C); 4020Sstevel@tonic-gate &mov($C, &DWP( 8,$tmp1,"",0)); 4030Sstevel@tonic-gate &add($C,$T); 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate &mov(&DWP( 0,$tmp1,"",0),$A); 4060Sstevel@tonic-gate &mov("esi",&wparam(1)); 4070Sstevel@tonic-gate &mov(&DWP( 8,$tmp1,"",0),$C); 4080Sstevel@tonic-gate &add("esi",64); 4090Sstevel@tonic-gate &mov("eax",&swtmp(17)); 4100Sstevel@tonic-gate &mov(&DWP(16,$tmp1,"",0),$E); 4110Sstevel@tonic-gate &cmp("esi","eax"); 4120Sstevel@tonic-gate &mov(&DWP( 4,$tmp1,"",0),$B); 413*2139Sjp161948 &jb(&label("start")); 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate &stack_pop(18+9); 4160Sstevel@tonic-gate &pop("edi"); 4170Sstevel@tonic-gate &pop("ebx"); 4180Sstevel@tonic-gate &pop("ebp"); 4190Sstevel@tonic-gate &pop("esi"); 4200Sstevel@tonic-gate &ret(); 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate # keep a note of shortcut label so it can be used outside 4230Sstevel@tonic-gate # block. 4240Sstevel@tonic-gate my $sclabel = &label("shortcut"); 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate &function_end_B($name); 4270Sstevel@tonic-gate # Putting this here avoids problems with MASM in debugging mode 4280Sstevel@tonic-gate &sha1_block_host("sha1_block_asm_host_order", $sclabel); 4290Sstevel@tonic-gate } 4300Sstevel@tonic-gate 431