1#! /usr/bin/env perl 2# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA1 block procedure for MIPS. 18 19# Performance improvement is 30% on unaligned input. The "secret" is 20# to deploy lwl/lwr pair to load unaligned input. One could have 21# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 22# compatible subroutine. There is room for minor optimization on 23# little-endian platforms... 24 25# September 2012. 26# 27# Add MIPS32r2 code (>25% less instructions). 28 29###################################################################### 30# There is a number of MIPS ABI in use, O32 and N32/64 are most 31# widely used. Then there is a new contender: NUBI. It appears that if 32# one picks the latter, it's possible to arrange code in ABI neutral 33# manner. Therefore let's stick to NUBI register layout: 34# 35($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 36($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 37($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 38($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 39# 40# The return value is placed in $a0. Following coding rules facilitate 41# interoperability: 42# 43# - never ever touch $tp, "thread pointer", former $gp; 44# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 45# old code]; 46# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 47# 48# For reference here is register layout for N32/64 MIPS ABIs: 49# 50# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 51# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 52# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 53# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 54# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 55# 56$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 57 58if ($flavour =~ /64|n32/i) { 59 $PTR_ADD="dadd"; # incidentally works even on n32 60 $PTR_SUB="dsub"; # incidentally works even on n32 61 $REG_S="sd"; 62 $REG_L="ld"; 63 $PTR_SLL="dsll"; # incidentally works even on n32 64 $SZREG=8; 65} else { 66 $PTR_ADD="add"; 67 $PTR_SUB="sub"; 68 $REG_S="sw"; 69 $REG_L="lw"; 70 $PTR_SLL="sll"; 71 $SZREG=4; 72} 73# 74# <appro@openssl.org> 75# 76###################################################################### 77 78$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); 79 80for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); } 81open STDOUT,">$output"; 82 83if (!defined($big_endian)) 84 { $big_endian=(unpack('L',pack('N',1))==1); } 85 86# offsets of the Most and Least Significant Bytes 87$MSB=$big_endian?0:3; 88$LSB=3&~$MSB; 89 90@X=map("\$$_",(8..23)); # a4-a7,s0-s11 91 92$ctx=$a0; 93$inp=$a1; 94$num=$a2; 95$A="\$1"; 96$B="\$2"; 97$C="\$3"; 98$D="\$7"; 99$E="\$24"; @V=($A,$B,$C,$D,$E); 100$t0="\$25"; 101$t1=$num; # $num is offloaded to stack 102$t2="\$30"; # fp 103$K="\$31"; # ra 104 105sub BODY_00_14 { 106my ($i,$a,$b,$c,$d,$e)=@_; 107my $j=$i+1; 108$code.=<<___ if (!$big_endian); 109#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 110 wsbh @X[$i],@X[$i] # byte swap($i) 111 rotr @X[$i],@X[$i],16 112#else 113 srl $t0,@X[$i],24 # byte swap($i) 114 srl $t1,@X[$i],8 115 andi $t2,@X[$i],0xFF00 116 sll @X[$i],@X[$i],24 117 andi $t1,0xFF00 118 sll $t2,$t2,8 119 or @X[$i],$t0 120 or $t1,$t2 121 or @X[$i],$t1 122#endif 123___ 124$code.=<<___; 125#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 126 addu $e,$K # $i 127 xor $t0,$c,$d 128 rotr $t1,$a,27 129 lwl @X[$j],$j*4+$MSB($inp) 130 and $t0,$b 131 addu $e,$t1 132 lwr @X[$j],$j*4+$LSB($inp) 133 xor $t0,$d 134 addu $e,@X[$i] 135 rotr $b,$b,2 136 addu $e,$t0 137#else 138 lwl @X[$j],$j*4+$MSB($inp) 139 sll $t0,$a,5 # $i 140 addu $e,$K 141 lwr @X[$j],$j*4+$LSB($inp) 142 srl $t1,$a,27 143 addu $e,$t0 144 xor $t0,$c,$d 145 addu $e,$t1 146 sll $t2,$b,30 147 and $t0,$b 148 srl $b,$b,2 149 xor $t0,$d 150 addu $e,@X[$i] 151 or $b,$t2 152 addu $e,$t0 153#endif 154___ 155} 156 157sub BODY_15_19 { 158my ($i,$a,$b,$c,$d,$e)=@_; 159my $j=$i+1; 160 161$code.=<<___ if (!$big_endian && $i==15); 162#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 163 wsbh @X[$i],@X[$i] # byte swap($i) 164 rotr @X[$i],@X[$i],16 165#else 166 srl $t0,@X[$i],24 # byte swap($i) 167 srl $t1,@X[$i],8 168 andi $t2,@X[$i],0xFF00 169 sll @X[$i],@X[$i],24 170 andi $t1,0xFF00 171 sll $t2,$t2,8 172 or @X[$i],$t0 173 or @X[$i],$t1 174 or @X[$i],$t2 175#endif 176___ 177$code.=<<___; 178#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 179 addu $e,$K # $i 180 xor @X[$j%16],@X[($j+2)%16] 181 xor $t0,$c,$d 182 rotr $t1,$a,27 183 xor @X[$j%16],@X[($j+8)%16] 184 and $t0,$b 185 addu $e,$t1 186 xor @X[$j%16],@X[($j+13)%16] 187 xor $t0,$d 188 addu $e,@X[$i%16] 189 rotr @X[$j%16],@X[$j%16],31 190 rotr $b,$b,2 191 addu $e,$t0 192#else 193 xor @X[$j%16],@X[($j+2)%16] 194 sll $t0,$a,5 # $i 195 addu $e,$K 196 srl $t1,$a,27 197 addu $e,$t0 198 xor @X[$j%16],@X[($j+8)%16] 199 xor $t0,$c,$d 200 addu $e,$t1 201 xor @X[$j%16],@X[($j+13)%16] 202 sll $t2,$b,30 203 and $t0,$b 204 srl $t1,@X[$j%16],31 205 addu @X[$j%16],@X[$j%16] 206 srl $b,$b,2 207 xor $t0,$d 208 or @X[$j%16],$t1 209 addu $e,@X[$i%16] 210 or $b,$t2 211 addu $e,$t0 212#endif 213___ 214} 215 216sub BODY_20_39 { 217my ($i,$a,$b,$c,$d,$e)=@_; 218my $j=$i+1; 219$code.=<<___ if ($i<79); 220#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 221 xor @X[$j%16],@X[($j+2)%16] 222 addu $e,$K # $i 223 rotr $t1,$a,27 224 xor @X[$j%16],@X[($j+8)%16] 225 xor $t0,$c,$d 226 addu $e,$t1 227 xor @X[$j%16],@X[($j+13)%16] 228 xor $t0,$b 229 addu $e,@X[$i%16] 230 rotr @X[$j%16],@X[$j%16],31 231 rotr $b,$b,2 232 addu $e,$t0 233#else 234 xor @X[$j%16],@X[($j+2)%16] 235 sll $t0,$a,5 # $i 236 addu $e,$K 237 srl $t1,$a,27 238 addu $e,$t0 239 xor @X[$j%16],@X[($j+8)%16] 240 xor $t0,$c,$d 241 addu $e,$t1 242 xor @X[$j%16],@X[($j+13)%16] 243 sll $t2,$b,30 244 xor $t0,$b 245 srl $t1,@X[$j%16],31 246 addu @X[$j%16],@X[$j%16] 247 srl $b,$b,2 248 addu $e,@X[$i%16] 249 or @X[$j%16],$t1 250 or $b,$t2 251 addu $e,$t0 252#endif 253___ 254$code.=<<___ if ($i==79); 255#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 256 lw @X[0],0($ctx) 257 addu $e,$K # $i 258 lw @X[1],4($ctx) 259 rotr $t1,$a,27 260 lw @X[2],8($ctx) 261 xor $t0,$c,$d 262 addu $e,$t1 263 lw @X[3],12($ctx) 264 xor $t0,$b 265 addu $e,@X[$i%16] 266 lw @X[4],16($ctx) 267 rotr $b,$b,2 268 addu $e,$t0 269#else 270 lw @X[0],0($ctx) 271 sll $t0,$a,5 # $i 272 addu $e,$K 273 lw @X[1],4($ctx) 274 srl $t1,$a,27 275 addu $e,$t0 276 lw @X[2],8($ctx) 277 xor $t0,$c,$d 278 addu $e,$t1 279 lw @X[3],12($ctx) 280 sll $t2,$b,30 281 xor $t0,$b 282 lw @X[4],16($ctx) 283 srl $b,$b,2 284 addu $e,@X[$i%16] 285 or $b,$t2 286 addu $e,$t0 287#endif 288___ 289} 290 291sub BODY_40_59 { 292my ($i,$a,$b,$c,$d,$e)=@_; 293my $j=$i+1; 294$code.=<<___ if ($i<79); 295#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 296 addu $e,$K # $i 297 and $t0,$c,$d 298 xor @X[$j%16],@X[($j+2)%16] 299 rotr $t1,$a,27 300 addu $e,$t0 301 xor @X[$j%16],@X[($j+8)%16] 302 xor $t0,$c,$d 303 addu $e,$t1 304 xor @X[$j%16],@X[($j+13)%16] 305 and $t0,$b 306 addu $e,@X[$i%16] 307 rotr @X[$j%16],@X[$j%16],31 308 rotr $b,$b,2 309 addu $e,$t0 310#else 311 xor @X[$j%16],@X[($j+2)%16] 312 sll $t0,$a,5 # $i 313 addu $e,$K 314 srl $t1,$a,27 315 addu $e,$t0 316 xor @X[$j%16],@X[($j+8)%16] 317 and $t0,$c,$d 318 addu $e,$t1 319 xor @X[$j%16],@X[($j+13)%16] 320 sll $t2,$b,30 321 addu $e,$t0 322 srl $t1,@X[$j%16],31 323 xor $t0,$c,$d 324 addu @X[$j%16],@X[$j%16] 325 and $t0,$b 326 srl $b,$b,2 327 or @X[$j%16],$t1 328 addu $e,@X[$i%16] 329 or $b,$t2 330 addu $e,$t0 331#endif 332___ 333} 334 335$FRAMESIZE=16; # large enough to accommodate NUBI saved registers 336$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; 337 338$code=<<___; 339#ifdef OPENSSL_FIPSCANISTER 340# include <openssl/fipssyms.h> 341#endif 342 343#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) 344#define _MIPS_ARCH_MIPS32R2 345#endif 346 347.text 348 349.set noat 350.set noreorder 351.align 5 352.globl sha1_block_data_order 353.ent sha1_block_data_order 354sha1_block_data_order: 355 .frame $sp,$FRAMESIZE*$SZREG,$ra 356 .mask $SAVED_REGS_MASK,-$SZREG 357 .set noreorder 358 $PTR_SUB $sp,$FRAMESIZE*$SZREG 359 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) 360 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) 361 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) 362 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) 363 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) 364 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) 365 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) 366 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) 367 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) 368 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) 369___ 370$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 371 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) 372 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) 373 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) 374 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) 375 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) 376___ 377$code.=<<___; 378 $PTR_SLL $num,6 379 $PTR_ADD $num,$inp 380 $REG_S $num,0($sp) 381 lw $A,0($ctx) 382 lw $B,4($ctx) 383 lw $C,8($ctx) 384 lw $D,12($ctx) 385 b .Loop 386 lw $E,16($ctx) 387.align 4 388.Loop: 389 .set reorder 390 lwl @X[0],$MSB($inp) 391 lui $K,0x5a82 392 lwr @X[0],$LSB($inp) 393 ori $K,0x7999 # K_00_19 394___ 395for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 396for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 397$code.=<<___; 398 lui $K,0x6ed9 399 ori $K,0xeba1 # K_20_39 400___ 401for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 402$code.=<<___; 403 lui $K,0x8f1b 404 ori $K,0xbcdc # K_40_59 405___ 406for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 407$code.=<<___; 408 lui $K,0xca62 409 ori $K,0xc1d6 # K_60_79 410___ 411for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 412$code.=<<___; 413 $PTR_ADD $inp,64 414 $REG_L $num,0($sp) 415 416 addu $A,$X[0] 417 addu $B,$X[1] 418 sw $A,0($ctx) 419 addu $C,$X[2] 420 addu $D,$X[3] 421 sw $B,4($ctx) 422 addu $E,$X[4] 423 sw $C,8($ctx) 424 sw $D,12($ctx) 425 sw $E,16($ctx) 426 .set noreorder 427 bne $inp,$num,.Loop 428 nop 429 430 .set noreorder 431 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) 432 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) 433 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) 434 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) 435 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) 436 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) 437 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) 438 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) 439 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) 440 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) 441___ 442$code.=<<___ if ($flavour =~ /nubi/i); 443 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) 444 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) 445 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) 446 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) 447 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) 448___ 449$code.=<<___; 450 jr $ra 451 $PTR_ADD $sp,$FRAMESIZE*$SZREG 452.end sha1_block_data_order 453.rdata 454.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 455___ 456print $code; 457close STDOUT; 458