1ec07fdf1Sdjm#!/usr/bin/env perl 2ec07fdf1Sdjm 3ec07fdf1Sdjm# ==================================================================== 4ec07fdf1Sdjm# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5ec07fdf1Sdjm# project. The module is, however, dual licensed under OpenSSL and 6ec07fdf1Sdjm# CRYPTOGAMS licenses depending on where you obtain it. For further 7ec07fdf1Sdjm# details see http://www.openssl.org/~appro/cryptogams/. 8ec07fdf1Sdjm# ==================================================================== 9ec07fdf1Sdjm 10ec07fdf1Sdjm# SHA1 block procedure for PA-RISC. 11ec07fdf1Sdjm 12ec07fdf1Sdjm# June 2009. 13ec07fdf1Sdjm# 14ec07fdf1Sdjm# On PA-7100LC performance is >30% better than gcc 3.2 generated code 15ec07fdf1Sdjm# for aligned input and >50% better for unaligned. Compared to vendor 16ec07fdf1Sdjm# compiler on PA-8600 it's almost 60% faster in 64-bit build and just 17ec07fdf1Sdjm# few percent faster in 32-bit one (this for aligned input, data for 18ec07fdf1Sdjm# unaligned input is not available). 19ec07fdf1Sdjm# 20ec07fdf1Sdjm# Special thanks to polarhome.com for providing HP-UX account. 21ec07fdf1Sdjm 22ec07fdf1Sdjm$flavour = shift; 23ec07fdf1Sdjm$output = shift; 24ec07fdf1Sdjmopen STDOUT,">$output"; 25ec07fdf1Sdjm 26ec07fdf1Sdjmif ($flavour =~ /64/) { 27ec07fdf1Sdjm $LEVEL ="2.0W"; 28ec07fdf1Sdjm $SIZE_T =8; 29ec07fdf1Sdjm $FRAME_MARKER =80; 30ec07fdf1Sdjm $SAVED_RP =16; 31ec07fdf1Sdjm $PUSH ="std"; 32ec07fdf1Sdjm $PUSHMA ="std,ma"; 33ec07fdf1Sdjm $POP ="ldd"; 34ec07fdf1Sdjm $POPMB ="ldd,mb"; 35ec07fdf1Sdjm} else { 36ec07fdf1Sdjm $LEVEL ="1.0"; 37ec07fdf1Sdjm $SIZE_T =4; 38ec07fdf1Sdjm $FRAME_MARKER =48; 39ec07fdf1Sdjm $SAVED_RP =20; 40ec07fdf1Sdjm $PUSH ="stw"; 41ec07fdf1Sdjm $PUSHMA ="stwm"; 42ec07fdf1Sdjm $POP ="ldw"; 43ec07fdf1Sdjm $POPMB ="ldwm"; 44ec07fdf1Sdjm} 45ec07fdf1Sdjm 46ec07fdf1Sdjm$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker 47ec07fdf1Sdjm # [+ argument transfer] 48ec07fdf1Sdjm$ctx="%r26"; # arg0 49ec07fdf1Sdjm$inp="%r25"; # arg1 50ec07fdf1Sdjm$num="%r24"; # arg2 51ec07fdf1Sdjm 52ec07fdf1Sdjm$t0="%r28"; 53ec07fdf1Sdjm$t1="%r29"; 54ec07fdf1Sdjm$K="%r31"; 55ec07fdf1Sdjm 56ec07fdf1Sdjm@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 57ec07fdf1Sdjm "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); 58ec07fdf1Sdjm 59ec07fdf1Sdjm@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); 60ec07fdf1Sdjm 61ec07fdf1Sdjmsub BODY_00_19 { 62ec07fdf1Sdjmmy ($i,$a,$b,$c,$d,$e)=@_; 63ec07fdf1Sdjmmy $j=$i+1; 64ec07fdf1Sdjm$code.=<<___ if ($i<15); 65ec07fdf1Sdjm addl $K,$e,$e ; $i 66ec07fdf1Sdjm shd $a,$a,27,$t1 67ec07fdf1Sdjm addl @X[$i],$e,$e 68ec07fdf1Sdjm and $c,$b,$t0 69ec07fdf1Sdjm addl $t1,$e,$e 70ec07fdf1Sdjm andcm $d,$b,$t1 71ec07fdf1Sdjm shd $b,$b,2,$b 72ec07fdf1Sdjm or $t1,$t0,$t0 73ec07fdf1Sdjm addl $t0,$e,$e 74ec07fdf1Sdjm___ 75ec07fdf1Sdjm$code.=<<___ if ($i>=15); # with forward Xupdate 76ec07fdf1Sdjm addl $K,$e,$e ; $i 77ec07fdf1Sdjm shd $a,$a,27,$t1 78ec07fdf1Sdjm xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 79ec07fdf1Sdjm addl @X[$i%16],$e,$e 80ec07fdf1Sdjm and $c,$b,$t0 81ec07fdf1Sdjm xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 82ec07fdf1Sdjm addl $t1,$e,$e 83ec07fdf1Sdjm andcm $d,$b,$t1 84ec07fdf1Sdjm shd $b,$b,2,$b 85ec07fdf1Sdjm or $t1,$t0,$t0 86ec07fdf1Sdjm xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 87ec07fdf1Sdjm add $t0,$e,$e 88ec07fdf1Sdjm shd @X[$j%16],@X[$j%16],31,@X[$j%16] 89ec07fdf1Sdjm___ 90ec07fdf1Sdjm} 91ec07fdf1Sdjm 92ec07fdf1Sdjmsub BODY_20_39 { 93ec07fdf1Sdjmmy ($i,$a,$b,$c,$d,$e)=@_; 94ec07fdf1Sdjmmy $j=$i+1; 95ec07fdf1Sdjm$code.=<<___ if ($i<79); 96ec07fdf1Sdjm xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i 97ec07fdf1Sdjm addl $K,$e,$e 98ec07fdf1Sdjm shd $a,$a,27,$t1 99ec07fdf1Sdjm xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 100ec07fdf1Sdjm addl @X[$i%16],$e,$e 101ec07fdf1Sdjm xor $b,$c,$t0 102ec07fdf1Sdjm xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 103ec07fdf1Sdjm addl $t1,$e,$e 104ec07fdf1Sdjm shd $b,$b,2,$b 105ec07fdf1Sdjm xor $d,$t0,$t0 106ec07fdf1Sdjm shd @X[$j%16],@X[$j%16],31,@X[$j%16] 107ec07fdf1Sdjm addl $t0,$e,$e 108ec07fdf1Sdjm___ 109ec07fdf1Sdjm$code.=<<___ if ($i==79); # with context load 110ec07fdf1Sdjm ldw 0($ctx),@X[0] ; $i 111ec07fdf1Sdjm addl $K,$e,$e 112ec07fdf1Sdjm shd $a,$a,27,$t1 113ec07fdf1Sdjm ldw 4($ctx),@X[1] 114ec07fdf1Sdjm addl @X[$i%16],$e,$e 115ec07fdf1Sdjm xor $b,$c,$t0 116ec07fdf1Sdjm ldw 8($ctx),@X[2] 117ec07fdf1Sdjm addl $t1,$e,$e 118ec07fdf1Sdjm shd $b,$b,2,$b 119ec07fdf1Sdjm xor $d,$t0,$t0 120ec07fdf1Sdjm ldw 12($ctx),@X[3] 121ec07fdf1Sdjm addl $t0,$e,$e 122ec07fdf1Sdjm ldw 16($ctx),@X[4] 123ec07fdf1Sdjm___ 124ec07fdf1Sdjm} 125ec07fdf1Sdjm 126ec07fdf1Sdjmsub BODY_40_59 { 127ec07fdf1Sdjmmy ($i,$a,$b,$c,$d,$e)=@_; 128ec07fdf1Sdjmmy $j=$i+1; 129ec07fdf1Sdjm$code.=<<___; 130ec07fdf1Sdjm shd $a,$a,27,$t1 ; $i 131ec07fdf1Sdjm addl $K,$e,$e 132ec07fdf1Sdjm xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 133ec07fdf1Sdjm xor $d,$c,$t0 134ec07fdf1Sdjm addl @X[$i%16],$e,$e 135ec07fdf1Sdjm xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 136ec07fdf1Sdjm and $b,$t0,$t0 137ec07fdf1Sdjm addl $t1,$e,$e 138ec07fdf1Sdjm shd $b,$b,2,$b 139ec07fdf1Sdjm xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 140ec07fdf1Sdjm addl $t0,$e,$e 141ec07fdf1Sdjm and $d,$c,$t1 142ec07fdf1Sdjm shd @X[$j%16],@X[$j%16],31,@X[$j%16] 143ec07fdf1Sdjm addl $t1,$e,$e 144ec07fdf1Sdjm___ 145ec07fdf1Sdjm} 146ec07fdf1Sdjm 147ec07fdf1Sdjm$code=<<___; 148ec07fdf1Sdjm .LEVEL $LEVEL 149*88da0340Smiod .text 150ec07fdf1Sdjm 151ec07fdf1Sdjm .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 152ec07fdf1Sdjmsha1_block_data_order 153ec07fdf1Sdjm .PROC 154ec07fdf1Sdjm .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 155ec07fdf1Sdjm .ENTRY 156ec07fdf1Sdjm $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 157ec07fdf1Sdjm $PUSHMA %r3,$FRAME(%sp) 158ec07fdf1Sdjm $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 159ec07fdf1Sdjm $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 160ec07fdf1Sdjm $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 161ec07fdf1Sdjm $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 162ec07fdf1Sdjm $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 163ec07fdf1Sdjm $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 164ec07fdf1Sdjm $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 165ec07fdf1Sdjm $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 166ec07fdf1Sdjm $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 167ec07fdf1Sdjm $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 168ec07fdf1Sdjm $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 169ec07fdf1Sdjm $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 170ec07fdf1Sdjm $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 171ec07fdf1Sdjm 172ec07fdf1Sdjm ldw 0($ctx),$A 173ec07fdf1Sdjm ldw 4($ctx),$B 174ec07fdf1Sdjm ldw 8($ctx),$C 175ec07fdf1Sdjm ldw 12($ctx),$D 176ec07fdf1Sdjm ldw 16($ctx),$E 177ec07fdf1Sdjm 178ec07fdf1Sdjm extru $inp,31,2,$t0 ; t0=inp&3; 179ec07fdf1Sdjm sh3addl $t0,%r0,$t0 ; t0*=8; 180ec07fdf1Sdjm subi 32,$t0,$t0 ; t0=32-t0; 181ec07fdf1Sdjm mtctl $t0,%cr11 ; %sar=t0; 182ec07fdf1Sdjm 183ec07fdf1SdjmL\$oop 184ec07fdf1Sdjm ldi 3,$t0 185ec07fdf1Sdjm andcm $inp,$t0,$t0 ; 64-bit neutral 186ec07fdf1Sdjm___ 187ec07fdf1Sdjm for ($i=0;$i<15;$i++) { # load input block 188ec07fdf1Sdjm $code.="\tldw `4*$i`($t0),@X[$i]\n"; } 189ec07fdf1Sdjm$code.=<<___; 190ec07fdf1Sdjm cmpb,*= $inp,$t0,L\$aligned 191ec07fdf1Sdjm ldw 60($t0),@X[15] 192ec07fdf1Sdjm ldw 64($t0),@X[16] 193ec07fdf1Sdjm___ 194ec07fdf1Sdjm for ($i=0;$i<16;$i++) { # align input 195ec07fdf1Sdjm $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } 196ec07fdf1Sdjm$code.=<<___; 197ec07fdf1SdjmL\$aligned 198ec07fdf1Sdjm ldil L'0x5a827000,$K ; K_00_19 199ec07fdf1Sdjm ldo 0x999($K),$K 200ec07fdf1Sdjm___ 201ec07fdf1Sdjmfor ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 202ec07fdf1Sdjm$code.=<<___; 203ec07fdf1Sdjm ldil L'0x6ed9e000,$K ; K_20_39 204ec07fdf1Sdjm ldo 0xba1($K),$K 205ec07fdf1Sdjm___ 206ec07fdf1Sdjm 207ec07fdf1Sdjmfor (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 208ec07fdf1Sdjm$code.=<<___; 209ec07fdf1Sdjm ldil L'0x8f1bb000,$K ; K_40_59 210ec07fdf1Sdjm ldo 0xcdc($K),$K 211ec07fdf1Sdjm___ 212ec07fdf1Sdjm 213ec07fdf1Sdjmfor (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 214ec07fdf1Sdjm$code.=<<___; 215ec07fdf1Sdjm ldil L'0xca62c000,$K ; K_60_79 216ec07fdf1Sdjm ldo 0x1d6($K),$K 217ec07fdf1Sdjm___ 218ec07fdf1Sdjmfor (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 219ec07fdf1Sdjm 220ec07fdf1Sdjm$code.=<<___; 221ec07fdf1Sdjm addl @X[0],$A,$A 222ec07fdf1Sdjm addl @X[1],$B,$B 223ec07fdf1Sdjm addl @X[2],$C,$C 224ec07fdf1Sdjm addl @X[3],$D,$D 225ec07fdf1Sdjm addl @X[4],$E,$E 226ec07fdf1Sdjm stw $A,0($ctx) 227ec07fdf1Sdjm stw $B,4($ctx) 228ec07fdf1Sdjm stw $C,8($ctx) 229ec07fdf1Sdjm stw $D,12($ctx) 230ec07fdf1Sdjm stw $E,16($ctx) 231ec07fdf1Sdjm addib,*<> -1,$num,L\$oop 232ec07fdf1Sdjm ldo 64($inp),$inp 233ec07fdf1Sdjm 234ec07fdf1Sdjm $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 235ec07fdf1Sdjm $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 236ec07fdf1Sdjm $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 237ec07fdf1Sdjm $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 238ec07fdf1Sdjm $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 239ec07fdf1Sdjm $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 240ec07fdf1Sdjm $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 241ec07fdf1Sdjm $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 242ec07fdf1Sdjm $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 243ec07fdf1Sdjm $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 244ec07fdf1Sdjm $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 245ec07fdf1Sdjm $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 246ec07fdf1Sdjm $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 247ec07fdf1Sdjm $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 248ec07fdf1Sdjm bv (%r2) 249ec07fdf1Sdjm .EXIT 250ec07fdf1Sdjm $POPMB -$FRAME(%sp),%r3 251ec07fdf1Sdjm .PROCEND 252ec07fdf1Sdjm___ 253ec07fdf1Sdjm 254ec07fdf1Sdjm$code =~ s/\`([^\`]*)\`/eval $1/gem; 255ec07fdf1Sdjm$code =~ s/,\*/,/gm if ($SIZE_T==4); 2569eac5592Smiod$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); 257ec07fdf1Sdjmprint $code; 258ec07fdf1Sdjmclose STDOUT; 259