1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# SHA1 block procedure for MIPS. 11 12# Performance improvement is 30% on unaligned input. The "secret" is 13# to deploy lwl/lwr pair to load unaligned input. One could have 14# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 15# compatible subroutine. There is room for minor optimization on 16# little-endian platforms... 17# 18# The code is somewhat IRIX-centric, i.e. is likely to require minor 19# adaptations for other OSes... 20 21for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); 22 $big_endian=0 if (/\-DL_ENDIAN/); } 23if (!defined($big_endian)) 24 { $big_endian=(unpack('L',pack('N',1))==1); } 25 26# offsets of the Most and Least Significant Bytes 27$MSB=$big_endian?0:3; 28$LSB=3&~$MSB; 29 30@X=( "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15", 31 "\$16", "\$17", "\$18", "\$19", "\$20", "\$21", "\$22", "\$23"); 32$ctx="\$4"; # a0 33$inp="\$5"; # a1 34$num="\$6"; # a2 35$A="\$1"; 36$B="\$2"; 37$C="\$3"; 38$D="\$7"; 39$E="\$24"; @V=($A,$B,$C,$D,$E); 40$t0="\$25"; # jp,t9 41$t1="\$28"; # gp 42$t2="\$30"; # fp,s8 43$K="\$31"; # ra 44 45$FRAMESIZE=16; 46 47sub BODY_00_14 { 48my ($i,$a,$b,$c,$d,$e)=@_; 49my $j=$i+1; 50$code.=<<___ if (!$big_endian); 51 srl $t0,@X[$i],24 # byte swap($i) 52 srl $t1,@X[$i],8 53 andi $t2,@X[$i],0xFF00 54 sll @X[$i],@X[$i],24 55 andi $t1,0xFF00 56 sll $t2,$t2,8 57 or @X[$i],$t0 58 or @X[$i],$t1 59 or @X[$i],$t2 60___ 61$code.=<<___; 62 lwl @X[$j],$j*4+$MSB($inp) 63 sll $t0,$a,5 # $i 64 addu $e,$K 65 lwr @X[$j],$j*4+$LSB($inp) 66 srl $t1,$a,27 67 addu $e,$t0 68 xor $t0,$c,$d 69 addu $e,$t1 70 sll $t2,$b,30 71 and $t0,$b 72 srl $b,$b,2 73 xor $t0,$d 74 addu $e,@X[$i] 75 or $b,$t2 76 addu $e,$t0 77___ 78} 79 80sub BODY_15_19 { 81my ($i,$a,$b,$c,$d,$e)=@_; 82my $j=$i+1; 83 84$code.=<<___ if (!$big_endian && $i==15); 85 srl $t0,@X[$i],24 # byte swap($i) 86 srl $t1,@X[$i],8 87 andi $t2,@X[$i],0xFF00 88 sll @X[$i],@X[$i],24 89 andi $t1,0xFF00 90 sll $t2,$t2,8 91 or @X[$i],$t0 92 or @X[$i],$t1 93 or @X[$i],$t2 94___ 95$code.=<<___; 96 xor @X[$j%16],@X[($j+2)%16] 97 sll $t0,$a,5 # $i 98 addu $e,$K 99 srl $t1,$a,27 100 addu $e,$t0 101 xor @X[$j%16],@X[($j+8)%16] 102 xor $t0,$c,$d 103 addu $e,$t1 104 xor @X[$j%16],@X[($j+13)%16] 105 sll $t2,$b,30 106 and $t0,$b 107 srl $t1,@X[$j%16],31 108 addu @X[$j%16],@X[$j%16] 109 srl $b,$b,2 110 xor $t0,$d 111 or @X[$j%16],$t1 112 addu $e,@X[$i%16] 113 or $b,$t2 114 addu $e,$t0 115___ 116} 117 118sub BODY_20_39 { 119my ($i,$a,$b,$c,$d,$e)=@_; 120my $j=$i+1; 121$code.=<<___ if ($i<79); 122 xor @X[$j%16],@X[($j+2)%16] 123 sll $t0,$a,5 # $i 124 addu $e,$K 125 srl $t1,$a,27 126 addu $e,$t0 127 xor @X[$j%16],@X[($j+8)%16] 128 xor $t0,$c,$d 129 addu $e,$t1 130 xor @X[$j%16],@X[($j+13)%16] 131 sll $t2,$b,30 132 xor $t0,$b 133 srl $t1,@X[$j%16],31 134 addu @X[$j%16],@X[$j%16] 135 srl $b,$b,2 136 addu $e,@X[$i%16] 137 or @X[$j%16],$t1 138 or $b,$t2 139 addu $e,$t0 140___ 141$code.=<<___ if ($i==79); 142 lw @X[0],0($ctx) 143 sll $t0,$a,5 # $i 144 addu $e,$K 145 lw @X[1],4($ctx) 146 srl $t1,$a,27 147 addu $e,$t0 148 lw @X[2],8($ctx) 149 xor $t0,$c,$d 150 addu $e,$t1 151 lw @X[3],12($ctx) 152 sll $t2,$b,30 153 xor $t0,$b 154 lw @X[4],16($ctx) 155 srl $b,$b,2 156 addu $e,@X[$i%16] 157 or $b,$t2 158 addu $e,$t0 159___ 160} 161 162sub BODY_40_59 { 163my ($i,$a,$b,$c,$d,$e)=@_; 164my $j=$i+1; 165$code.=<<___ if ($i<79); 166 xor @X[$j%16],@X[($j+2)%16] 167 sll $t0,$a,5 # $i 168 addu $e,$K 169 srl $t1,$a,27 170 addu $e,$t0 171 xor @X[$j%16],@X[($j+8)%16] 172 and $t0,$c,$d 173 addu $e,$t1 174 xor @X[$j%16],@X[($j+13)%16] 175 sll $t2,$b,30 176 addu $e,$t0 177 srl $t1,@X[$j%16],31 178 xor $t0,$c,$d 179 addu @X[$j%16],@X[$j%16] 180 and $t0,$b 181 srl $b,$b,2 182 or @X[$j%16],$t1 183 addu $e,@X[$i%16] 184 or $b,$t2 185 addu $e,$t0 186___ 187} 188 189$code=<<___; 190#include <asm.h> 191#include <regdef.h> 192 193.text 194 195.set noat 196.set noreorder 197.align 5 198.globl sha1_block_data_order 199.ent sha1_block_data_order 200sha1_block_data_order: 201 .frame sp,$FRAMESIZE*SZREG,zero 202 .mask 0xd0ff0000,-$FRAMESIZE*SZREG 203 .set noreorder 204 PTR_SUB sp,$FRAMESIZE*SZREG 205 REG_S \$31,($FRAMESIZE-1)*SZREG(sp) 206 REG_S \$30,($FRAMESIZE-2)*SZREG(sp) 207 REG_S \$28,($FRAMESIZE-3)*SZREG(sp) 208 REG_S \$23,($FRAMESIZE-4)*SZREG(sp) 209 REG_S \$22,($FRAMESIZE-5)*SZREG(sp) 210 REG_S \$21,($FRAMESIZE-6)*SZREG(sp) 211 REG_S \$20,($FRAMESIZE-7)*SZREG(sp) 212 REG_S \$19,($FRAMESIZE-8)*SZREG(sp) 213 REG_S \$18,($FRAMESIZE-9)*SZREG(sp) 214 REG_S \$17,($FRAMESIZE-10)*SZREG(sp) 215 REG_S \$16,($FRAMESIZE-11)*SZREG(sp) 216 217 lw $A,0($ctx) 218 lw $B,4($ctx) 219 lw $C,8($ctx) 220 lw $D,12($ctx) 221 b .Loop 222 lw $E,16($ctx) 223.align 4 224.Loop: 225 .set reorder 226 lwl @X[0],$MSB($inp) 227 lui $K,0x5a82 228 lwr @X[0],$LSB($inp) 229 ori $K,0x7999 # K_00_19 230___ 231for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 232for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 233$code.=<<___; 234 lui $K,0x6ed9 235 ori $K,0xeba1 # K_20_39 236___ 237for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 238$code.=<<___; 239 lui $K,0x8f1b 240 ori $K,0xbcdc # K_40_59 241___ 242for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 243$code.=<<___; 244 lui $K,0xca62 245 ori $K,0xc1d6 # K_60_79 246___ 247for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 248$code.=<<___; 249 addu $A,$X[0] 250 addu $B,$X[1] 251 sw $A,0($ctx) 252 addu $C,$X[2] 253 addu $D,$X[3] 254 sw $B,4($ctx) 255 addu $E,$X[4] 256 PTR_SUB $num,1 257 sw $C,8($ctx) 258 sw $D,12($ctx) 259 sw $E,16($ctx) 260 .set noreorder 261 bnez $num,.Loop 262 PTR_ADD $inp,64 263 264 .set noreorder 265 REG_L \$31,($FRAMESIZE-1)*SZREG(sp) 266 REG_L \$30,($FRAMESIZE-2)*SZREG(sp) 267 REG_L \$28,($FRAMESIZE-3)*SZREG(sp) 268 REG_L \$23,($FRAMESIZE-4)*SZREG(sp) 269 REG_L \$22,($FRAMESIZE-5)*SZREG(sp) 270 REG_L \$21,($FRAMESIZE-6)*SZREG(sp) 271 REG_L \$20,($FRAMESIZE-7)*SZREG(sp) 272 REG_L \$19,($FRAMESIZE-8)*SZREG(sp) 273 REG_L \$18,($FRAMESIZE-9)*SZREG(sp) 274 REG_L \$17,($FRAMESIZE-10)*SZREG(sp) 275 REG_L \$16,($FRAMESIZE-11)*SZREG(sp) 276 jr ra 277 PTR_ADD sp,$FRAMESIZE*SZREG 278.end sha1_block_data_order 279___ 280print $code; 281close STDOUT; 282