1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9# 10# April 2010 11# 12# The module implements "4-bit" GCM GHASH function and underlying 13# single multiplication operation in GF(2^128). "4-bit" means that it 14# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC 15# it processes one byte in 19.6 cycles, which is more than twice as 16# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for 17# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per 18# processed byte. This is ~2.2x faster than 64-bit code generated by 19# vendor compiler (which used to be very hard to beat:-). 20# 21# Special thanks to polarhome.com for providing HP-UX account. 22 23$flavour = shift; 24$output = shift; 25open STDOUT,">$output"; 26 27if ($flavour =~ /64/) { 28 $LEVEL ="2.0W"; 29 $SIZE_T =8; 30 $FRAME_MARKER =80; 31 $SAVED_RP =16; 32 $PUSH ="std"; 33 $PUSHMA ="std,ma"; 34 $POP ="ldd"; 35 $POPMB ="ldd,mb"; 36 $NREGS =6; 37} else { 38 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0"; 39 $SIZE_T =4; 40 $FRAME_MARKER =48; 41 $SAVED_RP =20; 42 $PUSH ="stw"; 43 $PUSHMA ="stwm"; 44 $POP ="ldw"; 45 $POPMB ="ldwm"; 46 $NREGS =11; 47} 48 49$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker 50 # [+ argument transfer] 51 52################# volatile registers 53$Xi="%r26"; # argument block 54$Htbl="%r25"; 55$inp="%r24"; 56$len="%r23"; 57$Hhh=$Htbl; # variables 58$Hll="%r22"; 59$Zhh="%r21"; 60$Zll="%r20"; 61$cnt="%r19"; 62$rem_4bit="%r28"; 63$rem="%r29"; 64$mask0xf0="%r31"; 65 66################# preserved registers 67$Thh="%r1"; 68$Tll="%r2"; 69$nlo="%r3"; 70$nhi="%r4"; 71$byte="%r5"; 72if ($SIZE_T==4) { 73 $Zhl="%r6"; 74 $Zlh="%r7"; 75 $Hhl="%r8"; 76 $Hlh="%r9"; 77 $Thl="%r10"; 78 $Tlh="%r11"; 79} 80$rem2="%r6"; # used in PA-RISC 2.0 code 81 82$code.=<<___; 83 .LEVEL $LEVEL 84#if 0 85 .SPACE \$TEXT\$ 86 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 87#else 88 .text 89#endif 90 91 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR 92 .ALIGN 64 93gcm_gmult_4bit 94 .PROC 95 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS 96 .ENTRY 97 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 98 $PUSHMA %r3,$FRAME(%sp) 99 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 100 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 101 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 102___ 103$code.=<<___ if ($SIZE_T==4); 104 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 105 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 106 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 107 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 108 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 109___ 110$code.=<<___; 111 blr %r0,$rem_4bit 112 ldi 3,$rem 113L\$pic_gmult 114 andcm $rem_4bit,$rem,$rem_4bit 115 addl $inp,$len,$len 116 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit 117 ldi 0xf0,$mask0xf0 118___ 119$code.=<<___ if ($SIZE_T==4); 120#ifndef __OpenBSD__ 121 ldi 31,$rem 122 mtctl $rem,%cr11 123 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 124 b L\$parisc1_gmult 125 nop 126___ 127 128$code.=<<___; 129 ldb 15($Xi),$nlo 130 ldo 8($Htbl),$Hll 131 132 and $mask0xf0,$nlo,$nhi 133 depd,z $nlo,59,4,$nlo 134 135 ldd $nlo($Hll),$Zll 136 ldd $nlo($Hhh),$Zhh 137 138 depd,z $Zll,60,4,$rem 139 shrpd $Zhh,$Zll,4,$Zll 140 extrd,u $Zhh,59,60,$Zhh 141 ldb 14($Xi),$nlo 142 143 ldd $nhi($Hll),$Tll 144 ldd $nhi($Hhh),$Thh 145 and $mask0xf0,$nlo,$nhi 146 depd,z $nlo,59,4,$nlo 147 148 xor $Tll,$Zll,$Zll 149 xor $Thh,$Zhh,$Zhh 150 ldd $rem($rem_4bit),$rem 151 b L\$oop_gmult_pa2 152 ldi 13,$cnt 153 154 .ALIGN 8 155L\$oop_gmult_pa2 156 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug 157 depd,z $Zll,60,4,$rem 158 159 shrpd $Zhh,$Zll,4,$Zll 160 extrd,u $Zhh,59,60,$Zhh 161 ldd $nlo($Hll),$Tll 162 ldd $nlo($Hhh),$Thh 163 164 xor $Tll,$Zll,$Zll 165 xor $Thh,$Zhh,$Zhh 166 ldd $rem($rem_4bit),$rem 167 168 xor $rem,$Zhh,$Zhh 169 depd,z $Zll,60,4,$rem 170 ldbx $cnt($Xi),$nlo 171 172 shrpd $Zhh,$Zll,4,$Zll 173 extrd,u $Zhh,59,60,$Zhh 174 ldd $nhi($Hll),$Tll 175 ldd $nhi($Hhh),$Thh 176 177 and $mask0xf0,$nlo,$nhi 178 depd,z $nlo,59,4,$nlo 179 ldd $rem($rem_4bit),$rem 180 181 xor $Tll,$Zll,$Zll 182 addib,uv -1,$cnt,L\$oop_gmult_pa2 183 xor $Thh,$Zhh,$Zhh 184 185 xor $rem,$Zhh,$Zhh 186 depd,z $Zll,60,4,$rem 187 188 shrpd $Zhh,$Zll,4,$Zll 189 extrd,u $Zhh,59,60,$Zhh 190 ldd $nlo($Hll),$Tll 191 ldd $nlo($Hhh),$Thh 192 193 xor $Tll,$Zll,$Zll 194 xor $Thh,$Zhh,$Zhh 195 ldd $rem($rem_4bit),$rem 196 197 xor $rem,$Zhh,$Zhh 198 depd,z $Zll,60,4,$rem 199 200 shrpd $Zhh,$Zll,4,$Zll 201 extrd,u $Zhh,59,60,$Zhh 202 ldd $nhi($Hll),$Tll 203 ldd $nhi($Hhh),$Thh 204 205 xor $Tll,$Zll,$Zll 206 xor $Thh,$Zhh,$Zhh 207 ldd $rem($rem_4bit),$rem 208 209 xor $rem,$Zhh,$Zhh 210 std $Zll,8($Xi) 211 std $Zhh,0($Xi) 212___ 213 214$code.=<<___ if ($SIZE_T==4); 215 b L\$done_gmult 216 nop 217 218L\$parisc1_gmult 219#endif 220 ldb 15($Xi),$nlo 221 ldo 12($Htbl),$Hll 222 ldo 8($Htbl),$Hlh 223 ldo 4($Htbl),$Hhl 224 225 and $mask0xf0,$nlo,$nhi 226 zdep $nlo,27,4,$nlo 227 228 ldwx $nlo($Hll),$Zll 229 ldwx $nlo($Hlh),$Zlh 230 ldwx $nlo($Hhl),$Zhl 231 ldwx $nlo($Hhh),$Zhh 232 zdep $Zll,28,4,$rem 233 ldb 14($Xi),$nlo 234 ldwx $rem($rem_4bit),$rem 235 shrpw $Zlh,$Zll,4,$Zll 236 ldwx $nhi($Hll),$Tll 237 shrpw $Zhl,$Zlh,4,$Zlh 238 ldwx $nhi($Hlh),$Tlh 239 shrpw $Zhh,$Zhl,4,$Zhl 240 ldwx $nhi($Hhl),$Thl 241 extru $Zhh,27,28,$Zhh 242 ldwx $nhi($Hhh),$Thh 243 xor $rem,$Zhh,$Zhh 244 and $mask0xf0,$nlo,$nhi 245 zdep $nlo,27,4,$nlo 246 247 xor $Tll,$Zll,$Zll 248 ldwx $nlo($Hll),$Tll 249 xor $Tlh,$Zlh,$Zlh 250 ldwx $nlo($Hlh),$Tlh 251 xor $Thl,$Zhl,$Zhl 252 b L\$oop_gmult_pa1 253 ldi 13,$cnt 254 255 .ALIGN 8 256L\$oop_gmult_pa1 257 zdep $Zll,28,4,$rem 258 ldwx $nlo($Hhl),$Thl 259 xor $Thh,$Zhh,$Zhh 260 ldwx $rem($rem_4bit),$rem 261 shrpw $Zlh,$Zll,4,$Zll 262 ldwx $nlo($Hhh),$Thh 263 shrpw $Zhl,$Zlh,4,$Zlh 264 ldbx $cnt($Xi),$nlo 265 xor $Tll,$Zll,$Zll 266 ldwx $nhi($Hll),$Tll 267 shrpw $Zhh,$Zhl,4,$Zhl 268 xor $Tlh,$Zlh,$Zlh 269 ldwx $nhi($Hlh),$Tlh 270 extru $Zhh,27,28,$Zhh 271 xor $Thl,$Zhl,$Zhl 272 ldwx $nhi($Hhl),$Thl 273 xor $rem,$Zhh,$Zhh 274 zdep $Zll,28,4,$rem 275 xor $Thh,$Zhh,$Zhh 276 ldwx $nhi($Hhh),$Thh 277 shrpw $Zlh,$Zll,4,$Zll 278 ldwx $rem($rem_4bit),$rem 279 shrpw $Zhl,$Zlh,4,$Zlh 280 shrpw $Zhh,$Zhl,4,$Zhl 281 and $mask0xf0,$nlo,$nhi 282 extru $Zhh,27,28,$Zhh 283 zdep $nlo,27,4,$nlo 284 xor $Tll,$Zll,$Zll 285 ldwx $nlo($Hll),$Tll 286 xor $Tlh,$Zlh,$Zlh 287 ldwx $nlo($Hlh),$Tlh 288 xor $rem,$Zhh,$Zhh 289 addib,uv -1,$cnt,L\$oop_gmult_pa1 290 xor $Thl,$Zhl,$Zhl 291 292 zdep $Zll,28,4,$rem 293 ldwx $nlo($Hhl),$Thl 294 xor $Thh,$Zhh,$Zhh 295 ldwx $rem($rem_4bit),$rem 296 shrpw $Zlh,$Zll,4,$Zll 297 ldwx $nlo($Hhh),$Thh 298 shrpw $Zhl,$Zlh,4,$Zlh 299 xor $Tll,$Zll,$Zll 300 ldwx $nhi($Hll),$Tll 301 shrpw $Zhh,$Zhl,4,$Zhl 302 xor $Tlh,$Zlh,$Zlh 303 ldwx $nhi($Hlh),$Tlh 304 extru $Zhh,27,28,$Zhh 305 xor $rem,$Zhh,$Zhh 306 xor $Thl,$Zhl,$Zhl 307 ldwx $nhi($Hhl),$Thl 308 xor $Thh,$Zhh,$Zhh 309 ldwx $nhi($Hhh),$Thh 310 zdep $Zll,28,4,$rem 311 ldwx $rem($rem_4bit),$rem 312 shrpw $Zlh,$Zll,4,$Zll 313 shrpw $Zhl,$Zlh,4,$Zlh 314 shrpw $Zhh,$Zhl,4,$Zhl 315 extru $Zhh,27,28,$Zhh 316 xor $Tll,$Zll,$Zll 317 xor $Tlh,$Zlh,$Zlh 318 xor $rem,$Zhh,$Zhh 319 stw $Zll,12($Xi) 320 xor $Thl,$Zhl,$Zhl 321 stw $Zlh,8($Xi) 322 xor $Thh,$Zhh,$Zhh 323 stw $Zhl,4($Xi) 324 stw $Zhh,0($Xi) 325___ 326$code.=<<___; 327L\$done_gmult 328 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 329 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 330 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 331 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 332___ 333$code.=<<___ if ($SIZE_T==4); 334 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 335 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 336 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 337 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 338 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 339___ 340$code.=<<___; 341 bv (%r2) 342 .EXIT 343 $POPMB -$FRAME(%sp),%r3 344 .PROCEND 345 346 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR 347 .ALIGN 64 348gcm_ghash_4bit 349 .PROC 350 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11 351 .ENTRY 352 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 353 $PUSHMA %r3,$FRAME(%sp) 354 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 355 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 356 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 357___ 358$code.=<<___ if ($SIZE_T==4); 359 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 360 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 361 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 362 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 363 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 364___ 365$code.=<<___; 366 blr %r0,$rem_4bit 367 ldi 3,$rem 368L\$pic_ghash 369 andcm $rem_4bit,$rem,$rem_4bit 370 addl $inp,$len,$len 371 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit 372 ldi 0xf0,$mask0xf0 373___ 374$code.=<<___ if ($SIZE_T==4); 375#ifndef __OpenBSD__ 376 ldi 31,$rem 377 mtctl $rem,%cr11 378 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 379 b L\$parisc1_ghash 380 nop 381___ 382 383$code.=<<___; 384 ldb 15($Xi),$nlo 385 ldo 8($Htbl),$Hll 386 387L\$outer_ghash_pa2 388 ldb 15($inp),$nhi 389 xor $nhi,$nlo,$nlo 390 and $mask0xf0,$nlo,$nhi 391 depd,z $nlo,59,4,$nlo 392 393 ldd $nlo($Hll),$Zll 394 ldd $nlo($Hhh),$Zhh 395 396 depd,z $Zll,60,4,$rem 397 shrpd $Zhh,$Zll,4,$Zll 398 extrd,u $Zhh,59,60,$Zhh 399 ldb 14($Xi),$nlo 400 ldb 14($inp),$byte 401 402 ldd $nhi($Hll),$Tll 403 ldd $nhi($Hhh),$Thh 404 xor $byte,$nlo,$nlo 405 and $mask0xf0,$nlo,$nhi 406 depd,z $nlo,59,4,$nlo 407 408 xor $Tll,$Zll,$Zll 409 xor $Thh,$Zhh,$Zhh 410 ldd $rem($rem_4bit),$rem 411 b L\$oop_ghash_pa2 412 ldi 13,$cnt 413 414 .ALIGN 8 415L\$oop_ghash_pa2 416 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug 417 depd,z $Zll,60,4,$rem2 418 419 shrpd $Zhh,$Zll,4,$Zll 420 extrd,u $Zhh,59,60,$Zhh 421 ldd $nlo($Hll),$Tll 422 ldd $nlo($Hhh),$Thh 423 424 xor $Tll,$Zll,$Zll 425 xor $Thh,$Zhh,$Zhh 426 ldbx $cnt($Xi),$nlo 427 ldbx $cnt($inp),$byte 428 429 depd,z $Zll,60,4,$rem 430 shrpd $Zhh,$Zll,4,$Zll 431 ldd $rem2($rem_4bit),$rem2 432 433 xor $rem2,$Zhh,$Zhh 434 xor $byte,$nlo,$nlo 435 ldd $nhi($Hll),$Tll 436 ldd $nhi($Hhh),$Thh 437 438 and $mask0xf0,$nlo,$nhi 439 depd,z $nlo,59,4,$nlo 440 441 extrd,u $Zhh,59,60,$Zhh 442 xor $Tll,$Zll,$Zll 443 444 ldd $rem($rem_4bit),$rem 445 addib,uv -1,$cnt,L\$oop_ghash_pa2 446 xor $Thh,$Zhh,$Zhh 447 448 xor $rem,$Zhh,$Zhh 449 depd,z $Zll,60,4,$rem2 450 451 shrpd $Zhh,$Zll,4,$Zll 452 extrd,u $Zhh,59,60,$Zhh 453 ldd $nlo($Hll),$Tll 454 ldd $nlo($Hhh),$Thh 455 456 xor $Tll,$Zll,$Zll 457 xor $Thh,$Zhh,$Zhh 458 459 depd,z $Zll,60,4,$rem 460 shrpd $Zhh,$Zll,4,$Zll 461 ldd $rem2($rem_4bit),$rem2 462 463 xor $rem2,$Zhh,$Zhh 464 ldd $nhi($Hll),$Tll 465 ldd $nhi($Hhh),$Thh 466 467 extrd,u $Zhh,59,60,$Zhh 468 xor $Tll,$Zll,$Zll 469 xor $Thh,$Zhh,$Zhh 470 ldd $rem($rem_4bit),$rem 471 472 xor $rem,$Zhh,$Zhh 473 std $Zll,8($Xi) 474 ldo 16($inp),$inp 475 std $Zhh,0($Xi) 476 cmpb,*<> $inp,$len,L\$outer_ghash_pa2 477 copy $Zll,$nlo 478___ 479 480$code.=<<___ if ($SIZE_T==4); 481 b L\$done_ghash 482 nop 483 484L\$parisc1_ghash 485#endif 486 ldb 15($Xi),$nlo 487 ldo 12($Htbl),$Hll 488 ldo 8($Htbl),$Hlh 489 ldo 4($Htbl),$Hhl 490 491L\$outer_ghash_pa1 492 ldb 15($inp),$byte 493 xor $byte,$nlo,$nlo 494 and $mask0xf0,$nlo,$nhi 495 zdep $nlo,27,4,$nlo 496 497 ldwx $nlo($Hll),$Zll 498 ldwx $nlo($Hlh),$Zlh 499 ldwx $nlo($Hhl),$Zhl 500 ldwx $nlo($Hhh),$Zhh 501 zdep $Zll,28,4,$rem 502 ldb 14($Xi),$nlo 503 ldb 14($inp),$byte 504 ldwx $rem($rem_4bit),$rem 505 shrpw $Zlh,$Zll,4,$Zll 506 ldwx $nhi($Hll),$Tll 507 shrpw $Zhl,$Zlh,4,$Zlh 508 ldwx $nhi($Hlh),$Tlh 509 shrpw $Zhh,$Zhl,4,$Zhl 510 ldwx $nhi($Hhl),$Thl 511 extru $Zhh,27,28,$Zhh 512 ldwx $nhi($Hhh),$Thh 513 xor $byte,$nlo,$nlo 514 xor $rem,$Zhh,$Zhh 515 and $mask0xf0,$nlo,$nhi 516 zdep $nlo,27,4,$nlo 517 518 xor $Tll,$Zll,$Zll 519 ldwx $nlo($Hll),$Tll 520 xor $Tlh,$Zlh,$Zlh 521 ldwx $nlo($Hlh),$Tlh 522 xor $Thl,$Zhl,$Zhl 523 b L\$oop_ghash_pa1 524 ldi 13,$cnt 525 526 .ALIGN 8 527L\$oop_ghash_pa1 528 zdep $Zll,28,4,$rem 529 ldwx $nlo($Hhl),$Thl 530 xor $Thh,$Zhh,$Zhh 531 ldwx $rem($rem_4bit),$rem 532 shrpw $Zlh,$Zll,4,$Zll 533 ldwx $nlo($Hhh),$Thh 534 shrpw $Zhl,$Zlh,4,$Zlh 535 ldbx $cnt($Xi),$nlo 536 xor $Tll,$Zll,$Zll 537 ldwx $nhi($Hll),$Tll 538 shrpw $Zhh,$Zhl,4,$Zhl 539 ldbx $cnt($inp),$byte 540 xor $Tlh,$Zlh,$Zlh 541 ldwx $nhi($Hlh),$Tlh 542 extru $Zhh,27,28,$Zhh 543 xor $Thl,$Zhl,$Zhl 544 ldwx $nhi($Hhl),$Thl 545 xor $rem,$Zhh,$Zhh 546 zdep $Zll,28,4,$rem 547 xor $Thh,$Zhh,$Zhh 548 ldwx $nhi($Hhh),$Thh 549 shrpw $Zlh,$Zll,4,$Zll 550 ldwx $rem($rem_4bit),$rem 551 shrpw $Zhl,$Zlh,4,$Zlh 552 xor $byte,$nlo,$nlo 553 shrpw $Zhh,$Zhl,4,$Zhl 554 and $mask0xf0,$nlo,$nhi 555 extru $Zhh,27,28,$Zhh 556 zdep $nlo,27,4,$nlo 557 xor $Tll,$Zll,$Zll 558 ldwx $nlo($Hll),$Tll 559 xor $Tlh,$Zlh,$Zlh 560 ldwx $nlo($Hlh),$Tlh 561 xor $rem,$Zhh,$Zhh 562 addib,uv -1,$cnt,L\$oop_ghash_pa1 563 xor $Thl,$Zhl,$Zhl 564 565 zdep $Zll,28,4,$rem 566 ldwx $nlo($Hhl),$Thl 567 xor $Thh,$Zhh,$Zhh 568 ldwx $rem($rem_4bit),$rem 569 shrpw $Zlh,$Zll,4,$Zll 570 ldwx $nlo($Hhh),$Thh 571 shrpw $Zhl,$Zlh,4,$Zlh 572 xor $Tll,$Zll,$Zll 573 ldwx $nhi($Hll),$Tll 574 shrpw $Zhh,$Zhl,4,$Zhl 575 xor $Tlh,$Zlh,$Zlh 576 ldwx $nhi($Hlh),$Tlh 577 extru $Zhh,27,28,$Zhh 578 xor $rem,$Zhh,$Zhh 579 xor $Thl,$Zhl,$Zhl 580 ldwx $nhi($Hhl),$Thl 581 xor $Thh,$Zhh,$Zhh 582 ldwx $nhi($Hhh),$Thh 583 zdep $Zll,28,4,$rem 584 ldwx $rem($rem_4bit),$rem 585 shrpw $Zlh,$Zll,4,$Zll 586 shrpw $Zhl,$Zlh,4,$Zlh 587 shrpw $Zhh,$Zhl,4,$Zhl 588 extru $Zhh,27,28,$Zhh 589 xor $Tll,$Zll,$Zll 590 xor $Tlh,$Zlh,$Zlh 591 xor $rem,$Zhh,$Zhh 592 stw $Zll,12($Xi) 593 xor $Thl,$Zhl,$Zhl 594 stw $Zlh,8($Xi) 595 xor $Thh,$Zhh,$Zhh 596 stw $Zhl,4($Xi) 597 ldo 16($inp),$inp 598 stw $Zhh,0($Xi) 599 comb,<> $inp,$len,L\$outer_ghash_pa1 600 copy $Zll,$nlo 601___ 602$code.=<<___; 603L\$done_ghash 604 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 605 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 606 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 607 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 608___ 609$code.=<<___ if ($SIZE_T==4); 610 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 611 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 612 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 613 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 614 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 615___ 616$code.=<<___; 617 bv (%r2) 618 .EXIT 619 $POPMB -$FRAME(%sp),%r3 620 .PROCEND 621 622 .ALIGN 64 623L\$rem_4bit 624 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 625 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 626 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 627 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 628 629 .data 630 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>" 631 .ALIGN 64 632___ 633 634# Explicitly encode PA-RISC 2.0 instructions used in this module, so 635# that it can be compiled with .LEVEL 1.0. It should be noted that I 636# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 637# directive... 638 639my $ldd = sub { 640 my ($mod,$args) = @_; 641 my $orig = "ldd$mod\t$args"; 642 643 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 644 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; 645 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 646 } 647 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 648 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; 649 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset 650 $opcode|=(1<<5) if ($mod =~ /^,m/); 651 $opcode|=(1<<13) if ($mod =~ /^,mb/); 652 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 653 } 654 else { "\t".$orig; } 655}; 656 657my $std = sub { 658 my ($mod,$args) = @_; 659 my $orig = "std$mod\t$args"; 660 661 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices 662 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); 663 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 664 } 665 else { "\t".$orig; } 666}; 667 668my $extrd = sub { 669 my ($mod,$args) = @_; 670 my $orig = "extrd$mod\t$args"; 671 672 # I only have ",u" completer, it's implicitly encoded... 673 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 674 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); 675 my $len=32-$3; 676 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos 677 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 678 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 679 } 680 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 681 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); 682 my $len=32-$2; 683 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len 684 $opcode |= (1<<13) if ($mod =~ /,\**=/); 685 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 686 } 687 else { "\t".$orig; } 688}; 689 690my $shrpd = sub { 691 my ($mod,$args) = @_; 692 my $orig = "shrpd$mod\t$args"; 693 694 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 695 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; 696 my $cpos=63-$3; 697 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa 698 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 699 } 700 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 701 { sprintf "\t.WORD\t0x%08x\t; %s", 702 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; 703 } 704 else { "\t".$orig; } 705}; 706 707my $depd = sub { 708 my ($mod,$args) = @_; 709 my $orig = "depd$mod\t$args"; 710 711 # I only have ",z" completer, it's implicitly encoded... 712 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 713 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); 714 my $cpos=63-$2; 715 my $len=32-$3; 716 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos 717 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 718 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 719 } 720 else { "\t".$orig; } 721}; 722 723sub assemble { 724 my ($mnemonic,$mod,$args)=@_; 725 my $opcode = eval("\$$mnemonic"); 726 727 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; 728} 729 730foreach (split("\n",$code)) { 731 s/\`([^\`]*)\`/eval $1/ge; 732 if ($SIZE_T==4) { 733 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e; 734 s/cmpb,\*/comb,/; 735 s/,\*/,/; 736 } 737 s/\bbv\b/bve/ if ($SIZE_T==8); 738 print $_,"\n"; 739} 740 741close STDOUT; 742