1ec07fdf1Sdjm#!/usr/bin/env perl 2ec07fdf1Sdjm# 3ec07fdf1Sdjm# ==================================================================== 4ec07fdf1Sdjm# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5ec07fdf1Sdjm# project. 6ec07fdf1Sdjm# 7ec07fdf1Sdjm# Rights for redistribution and usage in source and binary forms are 8ec07fdf1Sdjm# granted according to the OpenSSL license. Warranty of any kind is 9ec07fdf1Sdjm# disclaimed. 10ec07fdf1Sdjm# ==================================================================== 11ec07fdf1Sdjm 12ec07fdf1Sdjm 13ec07fdf1Sdjm# July 1999 14ec07fdf1Sdjm# 15ec07fdf1Sdjm# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. 16ec07fdf1Sdjm# 17ec07fdf1Sdjm# The module is designed to work with either of the "new" MIPS ABI(5), 18*71743258Sjmc# namely N32 or N64, offered by IRIX 6.x. It's not meant to work under 19ec07fdf1Sdjm# IRIX 5.x not only because it doesn't support new ABIs but also 20ec07fdf1Sdjm# because 5.x kernels put R4x00 CPU into 32-bit mode and all those 21ec07fdf1Sdjm# 64-bit instructions (daddu, dmultu, etc.) found below gonna only 22ec07fdf1Sdjm# cause illegal instruction exception:-( 23ec07fdf1Sdjm# 24ec07fdf1Sdjm# In addition the code depends on preprocessor flags set up by MIPSpro 25ec07fdf1Sdjm# compiler driver (either as or cc) and therefore (probably?) can't be 26ec07fdf1Sdjm# compiled by the GNU assembler. GNU C driver manages fine though... 27ec07fdf1Sdjm# I mean as long as -mmips-as is specified or is the default option, 28ec07fdf1Sdjm# because then it simply invokes /usr/bin/as which in turn takes 29ec07fdf1Sdjm# perfect care of the preprocessor definitions. Another neat feature 30ec07fdf1Sdjm# offered by the MIPSpro assembler is an optimization pass. This gave 31ec07fdf1Sdjm# me the opportunity to have the code looking more regular as all those 32ec07fdf1Sdjm# architecture dependent instruction rescheduling details were left to 33ec07fdf1Sdjm# the assembler. Cool, huh? 34ec07fdf1Sdjm# 35ec07fdf1Sdjm# Performance improvement is astonishing! 'apps/openssl speed rsa dsa' 36ec07fdf1Sdjm# goes way over 3 times faster! 37ec07fdf1Sdjm# 38ec07fdf1Sdjm# <appro@fy.chalmers.se> 39ec07fdf1Sdjm 40ec07fdf1Sdjm# October 2010 41ec07fdf1Sdjm# 42ec07fdf1Sdjm# Adapt the module even for 32-bit ABIs and other OSes. The former was 43ec07fdf1Sdjm# achieved by mechanical replacement of 64-bit arithmetic instructions 44ec07fdf1Sdjm# such as dmultu, daddu, etc. with their 32-bit counterparts and 45ec07fdf1Sdjm# adjusting offsets denoting multiples of BN_ULONG. Above mentioned 46ec07fdf1Sdjm# >3x performance improvement naturally does not apply to 32-bit code 47ec07fdf1Sdjm# [because there is no instruction 32-bit compiler can't use], one 48ec07fdf1Sdjm# has to content with 40-85% improvement depending on benchmark and 49ec07fdf1Sdjm# key length, more for longer keys. 50ec07fdf1Sdjm 51ec07fdf1Sdjm$flavour = shift; 52ec07fdf1Sdjmwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 53ec07fdf1Sdjmopen STDOUT,">$output"; 54ec07fdf1Sdjm 55ec07fdf1Sdjmif ($flavour =~ /64|n32/i) { 56ec07fdf1Sdjm $LD="ld"; 57ec07fdf1Sdjm $ST="sd"; 58ec07fdf1Sdjm $MULTU="dmultu"; 59ec07fdf1Sdjm $DIVU="ddivu"; 60ec07fdf1Sdjm $ADDU="daddu"; 61ec07fdf1Sdjm $SUBU="dsubu"; 62ec07fdf1Sdjm $SRL="dsrl"; 63ec07fdf1Sdjm $SLL="dsll"; 64ec07fdf1Sdjm $BNSZ=8; 65ec07fdf1Sdjm $PTR_ADD="daddu"; 66ec07fdf1Sdjm $PTR_SUB="dsubu"; 67ec07fdf1Sdjm $SZREG=8; 68ec07fdf1Sdjm $REG_S="sd"; 69ec07fdf1Sdjm $REG_L="ld"; 70ec07fdf1Sdjm} else { 71ec07fdf1Sdjm $LD="lw"; 72ec07fdf1Sdjm $ST="sw"; 73ec07fdf1Sdjm $MULTU="multu"; 74ec07fdf1Sdjm $DIVU="divu"; 75ec07fdf1Sdjm $ADDU="addu"; 76ec07fdf1Sdjm $SUBU="subu"; 77ec07fdf1Sdjm $SRL="srl"; 78ec07fdf1Sdjm $SLL="sll"; 79ec07fdf1Sdjm $BNSZ=4; 80ec07fdf1Sdjm $PTR_ADD="addu"; 81ec07fdf1Sdjm $PTR_SUB="subu"; 82ec07fdf1Sdjm $SZREG=4; 83ec07fdf1Sdjm $REG_S="sw"; 84ec07fdf1Sdjm $REG_L="lw"; 85ec07fdf1Sdjm $code=".set mips2\n"; 86ec07fdf1Sdjm} 87ec07fdf1Sdjm 88ec07fdf1Sdjm# Below is N32/64 register layout used in the original module. 89ec07fdf1Sdjm# 90ec07fdf1Sdjm($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 91ec07fdf1Sdjm($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 92ec07fdf1Sdjm($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 93ec07fdf1Sdjm($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 94ec07fdf1Sdjm($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 95ec07fdf1Sdjm($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7); 96ec07fdf1Sdjm# 97ec07fdf1Sdjm# No special adaptation is required for O32. NUBI on the other hand 98ec07fdf1Sdjm# is treated by saving/restoring ($v1,$t0..$t3). 99ec07fdf1Sdjm 100ec07fdf1Sdjm$gp=$v1 if ($flavour =~ /nubi/i); 101ec07fdf1Sdjm 102ec07fdf1Sdjm$minus4=$v1; 103ec07fdf1Sdjm 104ec07fdf1Sdjm$code.=<<___; 105ec07fdf1Sdjm.rdata 106ec07fdf1Sdjm.asciiz "mips3.s, Version 1.2" 107ec07fdf1Sdjm.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>" 108ec07fdf1Sdjm 109ec07fdf1Sdjm.text 110ec07fdf1Sdjm.set noat 111ec07fdf1Sdjm 112ec07fdf1Sdjm.align 5 113ec07fdf1Sdjm.globl bn_mul_add_words 114ec07fdf1Sdjm.ent bn_mul_add_words 115ec07fdf1Sdjmbn_mul_add_words: 116ec07fdf1Sdjm .set noreorder 117ec07fdf1Sdjm bgtz $a2,bn_mul_add_words_internal 118ec07fdf1Sdjm move $v0,$zero 119ec07fdf1Sdjm jr $ra 120ec07fdf1Sdjm move $a0,$v0 121ec07fdf1Sdjm.end bn_mul_add_words 122ec07fdf1Sdjm 123ec07fdf1Sdjm.align 5 124ec07fdf1Sdjm.ent bn_mul_add_words_internal 125ec07fdf1Sdjmbn_mul_add_words_internal: 126ec07fdf1Sdjm___ 127ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 128ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 129ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 130ec07fdf1Sdjm .set noreorder 131ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 132ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 133ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 134ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 135ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 136ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 137ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 138ec07fdf1Sdjm___ 139ec07fdf1Sdjm$code.=<<___; 140ec07fdf1Sdjm .set reorder 141ec07fdf1Sdjm li $minus4,-4 142ec07fdf1Sdjm and $ta0,$a2,$minus4 143ec07fdf1Sdjm beqz $ta0,.L_bn_mul_add_words_tail 144ec07fdf1Sdjm 145ec07fdf1Sdjm.L_bn_mul_add_words_loop: 1469eac5592Smiod $LD $t0,0($a1) 147ec07fdf1Sdjm $MULTU $t0,$a3 148ec07fdf1Sdjm $LD $t1,0($a0) 149ec07fdf1Sdjm $LD $t2,$BNSZ($a1) 150ec07fdf1Sdjm $LD $t3,$BNSZ($a0) 151ec07fdf1Sdjm $LD $ta0,2*$BNSZ($a1) 152ec07fdf1Sdjm $LD $ta1,2*$BNSZ($a0) 153ec07fdf1Sdjm $ADDU $t1,$v0 154ec07fdf1Sdjm sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit 155ec07fdf1Sdjm # values", but it seems to work fine 156ec07fdf1Sdjm # even on 64-bit registers. 157ec07fdf1Sdjm mflo $at 158ec07fdf1Sdjm mfhi $t0 159ec07fdf1Sdjm $ADDU $t1,$at 160ec07fdf1Sdjm $ADDU $v0,$t0 161ec07fdf1Sdjm $MULTU $t2,$a3 162ec07fdf1Sdjm sltu $at,$t1,$at 163ec07fdf1Sdjm $ST $t1,0($a0) 164ec07fdf1Sdjm $ADDU $v0,$at 165ec07fdf1Sdjm 166ec07fdf1Sdjm $LD $ta2,3*$BNSZ($a1) 167ec07fdf1Sdjm $LD $ta3,3*$BNSZ($a0) 168ec07fdf1Sdjm $ADDU $t3,$v0 169ec07fdf1Sdjm sltu $v0,$t3,$v0 170ec07fdf1Sdjm mflo $at 171ec07fdf1Sdjm mfhi $t2 172ec07fdf1Sdjm $ADDU $t3,$at 173ec07fdf1Sdjm $ADDU $v0,$t2 174ec07fdf1Sdjm $MULTU $ta0,$a3 175ec07fdf1Sdjm sltu $at,$t3,$at 176ec07fdf1Sdjm $ST $t3,$BNSZ($a0) 177ec07fdf1Sdjm $ADDU $v0,$at 178ec07fdf1Sdjm 179ec07fdf1Sdjm subu $a2,4 180ec07fdf1Sdjm $PTR_ADD $a0,4*$BNSZ 181ec07fdf1Sdjm $PTR_ADD $a1,4*$BNSZ 182ec07fdf1Sdjm $ADDU $ta1,$v0 183ec07fdf1Sdjm sltu $v0,$ta1,$v0 184ec07fdf1Sdjm mflo $at 185ec07fdf1Sdjm mfhi $ta0 186ec07fdf1Sdjm $ADDU $ta1,$at 187ec07fdf1Sdjm $ADDU $v0,$ta0 188ec07fdf1Sdjm $MULTU $ta2,$a3 189ec07fdf1Sdjm sltu $at,$ta1,$at 190ec07fdf1Sdjm $ST $ta1,-2*$BNSZ($a0) 191ec07fdf1Sdjm $ADDU $v0,$at 192ec07fdf1Sdjm 193ec07fdf1Sdjm 194ec07fdf1Sdjm and $ta0,$a2,$minus4 195ec07fdf1Sdjm $ADDU $ta3,$v0 196ec07fdf1Sdjm sltu $v0,$ta3,$v0 197ec07fdf1Sdjm mflo $at 198ec07fdf1Sdjm mfhi $ta2 199ec07fdf1Sdjm $ADDU $ta3,$at 200ec07fdf1Sdjm $ADDU $v0,$ta2 201ec07fdf1Sdjm sltu $at,$ta3,$at 202ec07fdf1Sdjm $ST $ta3,-$BNSZ($a0) 203ec07fdf1Sdjm .set noreorder 2049eac5592Smiod bgtz $ta0,.L_bn_mul_add_words_loop 2059eac5592Smiod $ADDU $v0,$at 206ec07fdf1Sdjm 207ec07fdf1Sdjm beqz $a2,.L_bn_mul_add_words_return 208ec07fdf1Sdjm nop 209ec07fdf1Sdjm 210ec07fdf1Sdjm.L_bn_mul_add_words_tail: 211ec07fdf1Sdjm .set reorder 212ec07fdf1Sdjm $LD $t0,0($a1) 213ec07fdf1Sdjm $MULTU $t0,$a3 214ec07fdf1Sdjm $LD $t1,0($a0) 215ec07fdf1Sdjm subu $a2,1 216ec07fdf1Sdjm $ADDU $t1,$v0 217ec07fdf1Sdjm sltu $v0,$t1,$v0 218ec07fdf1Sdjm mflo $at 219ec07fdf1Sdjm mfhi $t0 220ec07fdf1Sdjm $ADDU $t1,$at 221ec07fdf1Sdjm $ADDU $v0,$t0 222ec07fdf1Sdjm sltu $at,$t1,$at 223ec07fdf1Sdjm $ST $t1,0($a0) 224ec07fdf1Sdjm $ADDU $v0,$at 225ec07fdf1Sdjm beqz $a2,.L_bn_mul_add_words_return 226ec07fdf1Sdjm 227ec07fdf1Sdjm $LD $t0,$BNSZ($a1) 228ec07fdf1Sdjm $MULTU $t0,$a3 229ec07fdf1Sdjm $LD $t1,$BNSZ($a0) 230ec07fdf1Sdjm subu $a2,1 231ec07fdf1Sdjm $ADDU $t1,$v0 232ec07fdf1Sdjm sltu $v0,$t1,$v0 233ec07fdf1Sdjm mflo $at 234ec07fdf1Sdjm mfhi $t0 235ec07fdf1Sdjm $ADDU $t1,$at 236ec07fdf1Sdjm $ADDU $v0,$t0 237ec07fdf1Sdjm sltu $at,$t1,$at 238ec07fdf1Sdjm $ST $t1,$BNSZ($a0) 239ec07fdf1Sdjm $ADDU $v0,$at 240ec07fdf1Sdjm beqz $a2,.L_bn_mul_add_words_return 241ec07fdf1Sdjm 242ec07fdf1Sdjm $LD $t0,2*$BNSZ($a1) 243ec07fdf1Sdjm $MULTU $t0,$a3 244ec07fdf1Sdjm $LD $t1,2*$BNSZ($a0) 245ec07fdf1Sdjm $ADDU $t1,$v0 246ec07fdf1Sdjm sltu $v0,$t1,$v0 247ec07fdf1Sdjm mflo $at 248ec07fdf1Sdjm mfhi $t0 249ec07fdf1Sdjm $ADDU $t1,$at 250ec07fdf1Sdjm $ADDU $v0,$t0 251ec07fdf1Sdjm sltu $at,$t1,$at 252ec07fdf1Sdjm $ST $t1,2*$BNSZ($a0) 253ec07fdf1Sdjm $ADDU $v0,$at 254ec07fdf1Sdjm 255ec07fdf1Sdjm.L_bn_mul_add_words_return: 256ec07fdf1Sdjm .set noreorder 257ec07fdf1Sdjm___ 258ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 259ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 260ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 261ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 262ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 263ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 264ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 265ec07fdf1Sdjm___ 266ec07fdf1Sdjm$code.=<<___; 267ec07fdf1Sdjm jr $ra 268ec07fdf1Sdjm move $a0,$v0 269ec07fdf1Sdjm.end bn_mul_add_words_internal 270ec07fdf1Sdjm 271ec07fdf1Sdjm.align 5 272ec07fdf1Sdjm.globl bn_mul_words 273ec07fdf1Sdjm.ent bn_mul_words 274ec07fdf1Sdjmbn_mul_words: 275ec07fdf1Sdjm .set noreorder 276ec07fdf1Sdjm bgtz $a2,bn_mul_words_internal 277ec07fdf1Sdjm move $v0,$zero 278ec07fdf1Sdjm jr $ra 279ec07fdf1Sdjm move $a0,$v0 280ec07fdf1Sdjm.end bn_mul_words 281ec07fdf1Sdjm 282ec07fdf1Sdjm.align 5 283ec07fdf1Sdjm.ent bn_mul_words_internal 284ec07fdf1Sdjmbn_mul_words_internal: 285ec07fdf1Sdjm___ 286ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 287ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 288ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 289ec07fdf1Sdjm .set noreorder 290ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 291ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 292ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 293ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 294ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 295ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 296ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 297ec07fdf1Sdjm___ 298ec07fdf1Sdjm$code.=<<___; 299ec07fdf1Sdjm .set reorder 300ec07fdf1Sdjm li $minus4,-4 301ec07fdf1Sdjm and $ta0,$a2,$minus4 302ec07fdf1Sdjm beqz $ta0,.L_bn_mul_words_tail 303ec07fdf1Sdjm 304ec07fdf1Sdjm.L_bn_mul_words_loop: 3059eac5592Smiod $LD $t0,0($a1) 306ec07fdf1Sdjm $MULTU $t0,$a3 307ec07fdf1Sdjm $LD $t2,$BNSZ($a1) 308ec07fdf1Sdjm $LD $ta0,2*$BNSZ($a1) 309ec07fdf1Sdjm $LD $ta2,3*$BNSZ($a1) 310ec07fdf1Sdjm mflo $at 311ec07fdf1Sdjm mfhi $t0 312ec07fdf1Sdjm $ADDU $v0,$at 313ec07fdf1Sdjm sltu $t1,$v0,$at 314ec07fdf1Sdjm $MULTU $t2,$a3 315ec07fdf1Sdjm $ST $v0,0($a0) 316ec07fdf1Sdjm $ADDU $v0,$t1,$t0 317ec07fdf1Sdjm 318ec07fdf1Sdjm subu $a2,4 319ec07fdf1Sdjm $PTR_ADD $a0,4*$BNSZ 320ec07fdf1Sdjm $PTR_ADD $a1,4*$BNSZ 321ec07fdf1Sdjm mflo $at 322ec07fdf1Sdjm mfhi $t2 323ec07fdf1Sdjm $ADDU $v0,$at 324ec07fdf1Sdjm sltu $t3,$v0,$at 325ec07fdf1Sdjm $MULTU $ta0,$a3 326ec07fdf1Sdjm $ST $v0,-3*$BNSZ($a0) 327ec07fdf1Sdjm $ADDU $v0,$t3,$t2 328ec07fdf1Sdjm 329ec07fdf1Sdjm mflo $at 330ec07fdf1Sdjm mfhi $ta0 331ec07fdf1Sdjm $ADDU $v0,$at 332ec07fdf1Sdjm sltu $ta1,$v0,$at 333ec07fdf1Sdjm $MULTU $ta2,$a3 334ec07fdf1Sdjm $ST $v0,-2*$BNSZ($a0) 335ec07fdf1Sdjm $ADDU $v0,$ta1,$ta0 336ec07fdf1Sdjm 337ec07fdf1Sdjm and $ta0,$a2,$minus4 338ec07fdf1Sdjm mflo $at 339ec07fdf1Sdjm mfhi $ta2 340ec07fdf1Sdjm $ADDU $v0,$at 341ec07fdf1Sdjm sltu $ta3,$v0,$at 342ec07fdf1Sdjm $ST $v0,-$BNSZ($a0) 343ec07fdf1Sdjm .set noreorder 3449eac5592Smiod bgtz $ta0,.L_bn_mul_words_loop 3459eac5592Smiod $ADDU $v0,$ta3,$ta2 346ec07fdf1Sdjm 347ec07fdf1Sdjm beqz $a2,.L_bn_mul_words_return 348ec07fdf1Sdjm nop 349ec07fdf1Sdjm 350ec07fdf1Sdjm.L_bn_mul_words_tail: 351ec07fdf1Sdjm .set reorder 352ec07fdf1Sdjm $LD $t0,0($a1) 353ec07fdf1Sdjm $MULTU $t0,$a3 354ec07fdf1Sdjm subu $a2,1 355ec07fdf1Sdjm mflo $at 356ec07fdf1Sdjm mfhi $t0 357ec07fdf1Sdjm $ADDU $v0,$at 358ec07fdf1Sdjm sltu $t1,$v0,$at 359ec07fdf1Sdjm $ST $v0,0($a0) 360ec07fdf1Sdjm $ADDU $v0,$t1,$t0 361ec07fdf1Sdjm beqz $a2,.L_bn_mul_words_return 362ec07fdf1Sdjm 363ec07fdf1Sdjm $LD $t0,$BNSZ($a1) 364ec07fdf1Sdjm $MULTU $t0,$a3 365ec07fdf1Sdjm subu $a2,1 366ec07fdf1Sdjm mflo $at 367ec07fdf1Sdjm mfhi $t0 368ec07fdf1Sdjm $ADDU $v0,$at 369ec07fdf1Sdjm sltu $t1,$v0,$at 370ec07fdf1Sdjm $ST $v0,$BNSZ($a0) 371ec07fdf1Sdjm $ADDU $v0,$t1,$t0 372ec07fdf1Sdjm beqz $a2,.L_bn_mul_words_return 373ec07fdf1Sdjm 374ec07fdf1Sdjm $LD $t0,2*$BNSZ($a1) 375ec07fdf1Sdjm $MULTU $t0,$a3 376ec07fdf1Sdjm mflo $at 377ec07fdf1Sdjm mfhi $t0 378ec07fdf1Sdjm $ADDU $v0,$at 379ec07fdf1Sdjm sltu $t1,$v0,$at 380ec07fdf1Sdjm $ST $v0,2*$BNSZ($a0) 381ec07fdf1Sdjm $ADDU $v0,$t1,$t0 382ec07fdf1Sdjm 383ec07fdf1Sdjm.L_bn_mul_words_return: 384ec07fdf1Sdjm .set noreorder 385ec07fdf1Sdjm___ 386ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 387ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 388ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 389ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 390ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 391ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 392ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 393ec07fdf1Sdjm___ 394ec07fdf1Sdjm$code.=<<___; 395ec07fdf1Sdjm jr $ra 396ec07fdf1Sdjm move $a0,$v0 397ec07fdf1Sdjm.end bn_mul_words_internal 398ec07fdf1Sdjm 399ec07fdf1Sdjm.align 5 400ec07fdf1Sdjm.globl bn_sqr_words 401ec07fdf1Sdjm.ent bn_sqr_words 402ec07fdf1Sdjmbn_sqr_words: 403ec07fdf1Sdjm .set noreorder 404ec07fdf1Sdjm bgtz $a2,bn_sqr_words_internal 405ec07fdf1Sdjm move $v0,$zero 406ec07fdf1Sdjm jr $ra 407ec07fdf1Sdjm move $a0,$v0 408ec07fdf1Sdjm.end bn_sqr_words 409ec07fdf1Sdjm 410ec07fdf1Sdjm.align 5 411ec07fdf1Sdjm.ent bn_sqr_words_internal 412ec07fdf1Sdjmbn_sqr_words_internal: 413ec07fdf1Sdjm___ 414ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 415ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 416ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 417ec07fdf1Sdjm .set noreorder 418ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 419ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 420ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 421ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 422ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 423ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 424ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 425ec07fdf1Sdjm___ 426ec07fdf1Sdjm$code.=<<___; 427ec07fdf1Sdjm .set reorder 428ec07fdf1Sdjm li $minus4,-4 429ec07fdf1Sdjm and $ta0,$a2,$minus4 430ec07fdf1Sdjm beqz $ta0,.L_bn_sqr_words_tail 431ec07fdf1Sdjm 432ec07fdf1Sdjm.L_bn_sqr_words_loop: 4339eac5592Smiod $LD $t0,0($a1) 434ec07fdf1Sdjm $MULTU $t0,$t0 435ec07fdf1Sdjm $LD $t2,$BNSZ($a1) 436ec07fdf1Sdjm $LD $ta0,2*$BNSZ($a1) 437ec07fdf1Sdjm $LD $ta2,3*$BNSZ($a1) 438ec07fdf1Sdjm mflo $t1 439ec07fdf1Sdjm mfhi $t0 440ec07fdf1Sdjm $ST $t1,0($a0) 441ec07fdf1Sdjm $ST $t0,$BNSZ($a0) 442ec07fdf1Sdjm 443ec07fdf1Sdjm $MULTU $t2,$t2 444ec07fdf1Sdjm subu $a2,4 445ec07fdf1Sdjm $PTR_ADD $a0,8*$BNSZ 446ec07fdf1Sdjm $PTR_ADD $a1,4*$BNSZ 447ec07fdf1Sdjm mflo $t3 448ec07fdf1Sdjm mfhi $t2 449ec07fdf1Sdjm $ST $t3,-6*$BNSZ($a0) 450ec07fdf1Sdjm $ST $t2,-5*$BNSZ($a0) 451ec07fdf1Sdjm 452ec07fdf1Sdjm $MULTU $ta0,$ta0 453ec07fdf1Sdjm mflo $ta1 454ec07fdf1Sdjm mfhi $ta0 455ec07fdf1Sdjm $ST $ta1,-4*$BNSZ($a0) 456ec07fdf1Sdjm $ST $ta0,-3*$BNSZ($a0) 457ec07fdf1Sdjm 458ec07fdf1Sdjm 459ec07fdf1Sdjm $MULTU $ta2,$ta2 460ec07fdf1Sdjm and $ta0,$a2,$minus4 461ec07fdf1Sdjm mflo $ta3 462ec07fdf1Sdjm mfhi $ta2 463ec07fdf1Sdjm $ST $ta3,-2*$BNSZ($a0) 464ec07fdf1Sdjm 465ec07fdf1Sdjm .set noreorder 4669eac5592Smiod bgtz $ta0,.L_bn_sqr_words_loop 4679eac5592Smiod $ST $ta2,-$BNSZ($a0) 468ec07fdf1Sdjm 469ec07fdf1Sdjm beqz $a2,.L_bn_sqr_words_return 470ec07fdf1Sdjm nop 471ec07fdf1Sdjm 472ec07fdf1Sdjm.L_bn_sqr_words_tail: 473ec07fdf1Sdjm .set reorder 474ec07fdf1Sdjm $LD $t0,0($a1) 475ec07fdf1Sdjm $MULTU $t0,$t0 476ec07fdf1Sdjm subu $a2,1 477ec07fdf1Sdjm mflo $t1 478ec07fdf1Sdjm mfhi $t0 479ec07fdf1Sdjm $ST $t1,0($a0) 480ec07fdf1Sdjm $ST $t0,$BNSZ($a0) 481ec07fdf1Sdjm beqz $a2,.L_bn_sqr_words_return 482ec07fdf1Sdjm 483ec07fdf1Sdjm $LD $t0,$BNSZ($a1) 484ec07fdf1Sdjm $MULTU $t0,$t0 485ec07fdf1Sdjm subu $a2,1 486ec07fdf1Sdjm mflo $t1 487ec07fdf1Sdjm mfhi $t0 488ec07fdf1Sdjm $ST $t1,2*$BNSZ($a0) 489ec07fdf1Sdjm $ST $t0,3*$BNSZ($a0) 490ec07fdf1Sdjm beqz $a2,.L_bn_sqr_words_return 491ec07fdf1Sdjm 492ec07fdf1Sdjm $LD $t0,2*$BNSZ($a1) 493ec07fdf1Sdjm $MULTU $t0,$t0 494ec07fdf1Sdjm mflo $t1 495ec07fdf1Sdjm mfhi $t0 496ec07fdf1Sdjm $ST $t1,4*$BNSZ($a0) 497ec07fdf1Sdjm $ST $t0,5*$BNSZ($a0) 498ec07fdf1Sdjm 499ec07fdf1Sdjm.L_bn_sqr_words_return: 500ec07fdf1Sdjm .set noreorder 501ec07fdf1Sdjm___ 502ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 503ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 504ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 505ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 506ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 507ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 508ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 509ec07fdf1Sdjm___ 510ec07fdf1Sdjm$code.=<<___; 511ec07fdf1Sdjm jr $ra 512ec07fdf1Sdjm move $a0,$v0 513ec07fdf1Sdjm 514ec07fdf1Sdjm.end bn_sqr_words_internal 515ec07fdf1Sdjm 516ec07fdf1Sdjm.align 5 517ec07fdf1Sdjm.globl bn_add_words 518ec07fdf1Sdjm.ent bn_add_words 519ec07fdf1Sdjmbn_add_words: 520ec07fdf1Sdjm .set noreorder 521ec07fdf1Sdjm bgtz $a3,bn_add_words_internal 522ec07fdf1Sdjm move $v0,$zero 523ec07fdf1Sdjm jr $ra 524ec07fdf1Sdjm move $a0,$v0 525ec07fdf1Sdjm.end bn_add_words 526ec07fdf1Sdjm 527ec07fdf1Sdjm.align 5 528ec07fdf1Sdjm.ent bn_add_words_internal 529ec07fdf1Sdjmbn_add_words_internal: 530ec07fdf1Sdjm___ 531ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 532ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 533ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 534ec07fdf1Sdjm .set noreorder 535ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 536ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 537ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 538ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 539ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 540ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 541ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 542ec07fdf1Sdjm___ 543ec07fdf1Sdjm$code.=<<___; 544ec07fdf1Sdjm .set reorder 545ec07fdf1Sdjm li $minus4,-4 546ec07fdf1Sdjm and $at,$a3,$minus4 547ec07fdf1Sdjm beqz $at,.L_bn_add_words_tail 548ec07fdf1Sdjm 549ec07fdf1Sdjm.L_bn_add_words_loop: 5509eac5592Smiod $LD $t0,0($a1) 551ec07fdf1Sdjm $LD $ta0,0($a2) 552ec07fdf1Sdjm subu $a3,4 553ec07fdf1Sdjm $LD $t1,$BNSZ($a1) 554ec07fdf1Sdjm and $at,$a3,$minus4 555ec07fdf1Sdjm $LD $t2,2*$BNSZ($a1) 556ec07fdf1Sdjm $PTR_ADD $a2,4*$BNSZ 557ec07fdf1Sdjm $LD $t3,3*$BNSZ($a1) 558ec07fdf1Sdjm $PTR_ADD $a0,4*$BNSZ 559ec07fdf1Sdjm $LD $ta1,-3*$BNSZ($a2) 560ec07fdf1Sdjm $PTR_ADD $a1,4*$BNSZ 561ec07fdf1Sdjm $LD $ta2,-2*$BNSZ($a2) 562ec07fdf1Sdjm $LD $ta3,-$BNSZ($a2) 563ec07fdf1Sdjm $ADDU $ta0,$t0 564ec07fdf1Sdjm sltu $t8,$ta0,$t0 565ec07fdf1Sdjm $ADDU $t0,$ta0,$v0 566ec07fdf1Sdjm sltu $v0,$t0,$ta0 567ec07fdf1Sdjm $ST $t0,-4*$BNSZ($a0) 568ec07fdf1Sdjm $ADDU $v0,$t8 569ec07fdf1Sdjm 570ec07fdf1Sdjm $ADDU $ta1,$t1 571ec07fdf1Sdjm sltu $t9,$ta1,$t1 572ec07fdf1Sdjm $ADDU $t1,$ta1,$v0 573ec07fdf1Sdjm sltu $v0,$t1,$ta1 574ec07fdf1Sdjm $ST $t1,-3*$BNSZ($a0) 575ec07fdf1Sdjm $ADDU $v0,$t9 576ec07fdf1Sdjm 577ec07fdf1Sdjm $ADDU $ta2,$t2 578ec07fdf1Sdjm sltu $t8,$ta2,$t2 579ec07fdf1Sdjm $ADDU $t2,$ta2,$v0 580ec07fdf1Sdjm sltu $v0,$t2,$ta2 581ec07fdf1Sdjm $ST $t2,-2*$BNSZ($a0) 582ec07fdf1Sdjm $ADDU $v0,$t8 583ec07fdf1Sdjm 584ec07fdf1Sdjm $ADDU $ta3,$t3 585ec07fdf1Sdjm sltu $t9,$ta3,$t3 586ec07fdf1Sdjm $ADDU $t3,$ta3,$v0 587ec07fdf1Sdjm sltu $v0,$t3,$ta3 588ec07fdf1Sdjm $ST $t3,-$BNSZ($a0) 589ec07fdf1Sdjm 590ec07fdf1Sdjm .set noreorder 5919eac5592Smiod bgtz $at,.L_bn_add_words_loop 5929eac5592Smiod $ADDU $v0,$t9 593ec07fdf1Sdjm 594ec07fdf1Sdjm beqz $a3,.L_bn_add_words_return 595ec07fdf1Sdjm nop 596ec07fdf1Sdjm 597ec07fdf1Sdjm.L_bn_add_words_tail: 598ec07fdf1Sdjm .set reorder 599ec07fdf1Sdjm $LD $t0,0($a1) 600ec07fdf1Sdjm $LD $ta0,0($a2) 601ec07fdf1Sdjm $ADDU $ta0,$t0 602ec07fdf1Sdjm subu $a3,1 603ec07fdf1Sdjm sltu $t8,$ta0,$t0 604ec07fdf1Sdjm $ADDU $t0,$ta0,$v0 605ec07fdf1Sdjm sltu $v0,$t0,$ta0 606ec07fdf1Sdjm $ST $t0,0($a0) 607ec07fdf1Sdjm $ADDU $v0,$t8 608ec07fdf1Sdjm beqz $a3,.L_bn_add_words_return 609ec07fdf1Sdjm 610ec07fdf1Sdjm $LD $t1,$BNSZ($a1) 611ec07fdf1Sdjm $LD $ta1,$BNSZ($a2) 612ec07fdf1Sdjm $ADDU $ta1,$t1 613ec07fdf1Sdjm subu $a3,1 614ec07fdf1Sdjm sltu $t9,$ta1,$t1 615ec07fdf1Sdjm $ADDU $t1,$ta1,$v0 616ec07fdf1Sdjm sltu $v0,$t1,$ta1 617ec07fdf1Sdjm $ST $t1,$BNSZ($a0) 618ec07fdf1Sdjm $ADDU $v0,$t9 619ec07fdf1Sdjm beqz $a3,.L_bn_add_words_return 620ec07fdf1Sdjm 621ec07fdf1Sdjm $LD $t2,2*$BNSZ($a1) 622ec07fdf1Sdjm $LD $ta2,2*$BNSZ($a2) 623ec07fdf1Sdjm $ADDU $ta2,$t2 624ec07fdf1Sdjm sltu $t8,$ta2,$t2 625ec07fdf1Sdjm $ADDU $t2,$ta2,$v0 626ec07fdf1Sdjm sltu $v0,$t2,$ta2 627ec07fdf1Sdjm $ST $t2,2*$BNSZ($a0) 628ec07fdf1Sdjm $ADDU $v0,$t8 629ec07fdf1Sdjm 630ec07fdf1Sdjm.L_bn_add_words_return: 631ec07fdf1Sdjm .set noreorder 632ec07fdf1Sdjm___ 633ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 634ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 635ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 636ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 637ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 638ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 639ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 640ec07fdf1Sdjm___ 641ec07fdf1Sdjm$code.=<<___; 642ec07fdf1Sdjm jr $ra 643ec07fdf1Sdjm move $a0,$v0 644ec07fdf1Sdjm 645ec07fdf1Sdjm.end bn_add_words_internal 646ec07fdf1Sdjm 647ec07fdf1Sdjm.align 5 648ec07fdf1Sdjm.globl bn_sub_words 649ec07fdf1Sdjm.ent bn_sub_words 650ec07fdf1Sdjmbn_sub_words: 651ec07fdf1Sdjm .set noreorder 652ec07fdf1Sdjm bgtz $a3,bn_sub_words_internal 653ec07fdf1Sdjm move $v0,$zero 654ec07fdf1Sdjm jr $ra 655ec07fdf1Sdjm move $a0,$zero 656ec07fdf1Sdjm.end bn_sub_words 657ec07fdf1Sdjm 658ec07fdf1Sdjm.align 5 659ec07fdf1Sdjm.ent bn_sub_words_internal 660ec07fdf1Sdjmbn_sub_words_internal: 661ec07fdf1Sdjm___ 662ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 663ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 664ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 665ec07fdf1Sdjm .set noreorder 666ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 667ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 668ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 669ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 670ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 671ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 672ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 673ec07fdf1Sdjm___ 674ec07fdf1Sdjm$code.=<<___; 675ec07fdf1Sdjm .set reorder 676ec07fdf1Sdjm li $minus4,-4 677ec07fdf1Sdjm and $at,$a3,$minus4 678ec07fdf1Sdjm beqz $at,.L_bn_sub_words_tail 679ec07fdf1Sdjm 680ec07fdf1Sdjm.L_bn_sub_words_loop: 6819eac5592Smiod $LD $t0,0($a1) 682ec07fdf1Sdjm $LD $ta0,0($a2) 683ec07fdf1Sdjm subu $a3,4 684ec07fdf1Sdjm $LD $t1,$BNSZ($a1) 685ec07fdf1Sdjm and $at,$a3,$minus4 686ec07fdf1Sdjm $LD $t2,2*$BNSZ($a1) 687ec07fdf1Sdjm $PTR_ADD $a2,4*$BNSZ 688ec07fdf1Sdjm $LD $t3,3*$BNSZ($a1) 689ec07fdf1Sdjm $PTR_ADD $a0,4*$BNSZ 690ec07fdf1Sdjm $LD $ta1,-3*$BNSZ($a2) 691ec07fdf1Sdjm $PTR_ADD $a1,4*$BNSZ 692ec07fdf1Sdjm $LD $ta2,-2*$BNSZ($a2) 693ec07fdf1Sdjm $LD $ta3,-$BNSZ($a2) 694ec07fdf1Sdjm sltu $t8,$t0,$ta0 695ec07fdf1Sdjm $SUBU $ta0,$t0,$ta0 696ec07fdf1Sdjm $SUBU $t0,$ta0,$v0 697ec07fdf1Sdjm sgtu $v0,$t0,$ta0 698ec07fdf1Sdjm $ST $t0,-4*$BNSZ($a0) 699ec07fdf1Sdjm $ADDU $v0,$t8 700ec07fdf1Sdjm 701ec07fdf1Sdjm sltu $t9,$t1,$ta1 702ec07fdf1Sdjm $SUBU $ta1,$t1,$ta1 703ec07fdf1Sdjm $SUBU $t1,$ta1,$v0 704ec07fdf1Sdjm sgtu $v0,$t1,$ta1 705ec07fdf1Sdjm $ST $t1,-3*$BNSZ($a0) 706ec07fdf1Sdjm $ADDU $v0,$t9 707ec07fdf1Sdjm 708ec07fdf1Sdjm 709ec07fdf1Sdjm sltu $t8,$t2,$ta2 710ec07fdf1Sdjm $SUBU $ta2,$t2,$ta2 711ec07fdf1Sdjm $SUBU $t2,$ta2,$v0 712ec07fdf1Sdjm sgtu $v0,$t2,$ta2 713ec07fdf1Sdjm $ST $t2,-2*$BNSZ($a0) 714ec07fdf1Sdjm $ADDU $v0,$t8 715ec07fdf1Sdjm 716ec07fdf1Sdjm sltu $t9,$t3,$ta3 717ec07fdf1Sdjm $SUBU $ta3,$t3,$ta3 718ec07fdf1Sdjm $SUBU $t3,$ta3,$v0 719ec07fdf1Sdjm sgtu $v0,$t3,$ta3 720ec07fdf1Sdjm $ST $t3,-$BNSZ($a0) 721ec07fdf1Sdjm 722ec07fdf1Sdjm .set noreorder 7239eac5592Smiod bgtz $at,.L_bn_sub_words_loop 7249eac5592Smiod $ADDU $v0,$t9 725ec07fdf1Sdjm 726ec07fdf1Sdjm beqz $a3,.L_bn_sub_words_return 727ec07fdf1Sdjm nop 728ec07fdf1Sdjm 729ec07fdf1Sdjm.L_bn_sub_words_tail: 730ec07fdf1Sdjm .set reorder 731ec07fdf1Sdjm $LD $t0,0($a1) 732ec07fdf1Sdjm $LD $ta0,0($a2) 733ec07fdf1Sdjm subu $a3,1 734ec07fdf1Sdjm sltu $t8,$t0,$ta0 735ec07fdf1Sdjm $SUBU $ta0,$t0,$ta0 736ec07fdf1Sdjm $SUBU $t0,$ta0,$v0 737ec07fdf1Sdjm sgtu $v0,$t0,$ta0 738ec07fdf1Sdjm $ST $t0,0($a0) 739ec07fdf1Sdjm $ADDU $v0,$t8 740ec07fdf1Sdjm beqz $a3,.L_bn_sub_words_return 741ec07fdf1Sdjm 742ec07fdf1Sdjm $LD $t1,$BNSZ($a1) 743ec07fdf1Sdjm subu $a3,1 744ec07fdf1Sdjm $LD $ta1,$BNSZ($a2) 745ec07fdf1Sdjm sltu $t9,$t1,$ta1 746ec07fdf1Sdjm $SUBU $ta1,$t1,$ta1 747ec07fdf1Sdjm $SUBU $t1,$ta1,$v0 748ec07fdf1Sdjm sgtu $v0,$t1,$ta1 749ec07fdf1Sdjm $ST $t1,$BNSZ($a0) 750ec07fdf1Sdjm $ADDU $v0,$t9 751ec07fdf1Sdjm beqz $a3,.L_bn_sub_words_return 752ec07fdf1Sdjm 753ec07fdf1Sdjm $LD $t2,2*$BNSZ($a1) 754ec07fdf1Sdjm $LD $ta2,2*$BNSZ($a2) 755ec07fdf1Sdjm sltu $t8,$t2,$ta2 756ec07fdf1Sdjm $SUBU $ta2,$t2,$ta2 757ec07fdf1Sdjm $SUBU $t2,$ta2,$v0 758ec07fdf1Sdjm sgtu $v0,$t2,$ta2 759ec07fdf1Sdjm $ST $t2,2*$BNSZ($a0) 760ec07fdf1Sdjm $ADDU $v0,$t8 761ec07fdf1Sdjm 762ec07fdf1Sdjm.L_bn_sub_words_return: 763ec07fdf1Sdjm .set noreorder 764ec07fdf1Sdjm___ 765ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 766ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 767ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 768ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 769ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 770ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 771ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 772ec07fdf1Sdjm___ 773ec07fdf1Sdjm$code.=<<___; 774ec07fdf1Sdjm jr $ra 775ec07fdf1Sdjm move $a0,$v0 776ec07fdf1Sdjm.end bn_sub_words_internal 777ec07fdf1Sdjm 778ec07fdf1Sdjm.align 5 779ec07fdf1Sdjm.globl bn_div_3_words 780ec07fdf1Sdjm.ent bn_div_3_words 781ec07fdf1Sdjmbn_div_3_words: 782ec07fdf1Sdjm .set noreorder 783ec07fdf1Sdjm move $a3,$a0 # we know that bn_div_words does not 784ec07fdf1Sdjm # touch $a3, $ta2, $ta3 and preserves $a2 785ec07fdf1Sdjm # so that we can save two arguments 786ec07fdf1Sdjm # and return address in registers 787ec07fdf1Sdjm # instead of stack:-) 788ec07fdf1Sdjm 789ec07fdf1Sdjm $LD $a0,($a3) 790ec07fdf1Sdjm move $ta2,$a1 791ec07fdf1Sdjm bne $a0,$a2,bn_div_3_words_internal 792ec07fdf1Sdjm $LD $a1,-$BNSZ($a3) 793ec07fdf1Sdjm li $v0,-1 794ec07fdf1Sdjm jr $ra 795ec07fdf1Sdjm move $a0,$v0 796ec07fdf1Sdjm.end bn_div_3_words 797ec07fdf1Sdjm 798ec07fdf1Sdjm.align 5 799ec07fdf1Sdjm.ent bn_div_3_words_internal 800ec07fdf1Sdjmbn_div_3_words_internal: 801ec07fdf1Sdjm___ 802ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 803ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 804ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 805ec07fdf1Sdjm .set noreorder 806ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 807ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 808ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 809ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 810ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 811ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 812ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 813ec07fdf1Sdjm___ 814ec07fdf1Sdjm$code.=<<___; 815ec07fdf1Sdjm .set reorder 816ec07fdf1Sdjm move $ta3,$ra 8179eac5592Smiod bal bn_div_words_internal 818ec07fdf1Sdjm move $ra,$ta3 819ec07fdf1Sdjm $MULTU $ta2,$v0 820ec07fdf1Sdjm $LD $t2,-2*$BNSZ($a3) 821ec07fdf1Sdjm move $ta0,$zero 822ec07fdf1Sdjm mfhi $t1 823ec07fdf1Sdjm mflo $t0 824ec07fdf1Sdjm sltu $t8,$t1,$a1 825ec07fdf1Sdjm.L_bn_div_3_words_inner_loop: 826ec07fdf1Sdjm bnez $t8,.L_bn_div_3_words_inner_loop_done 827ec07fdf1Sdjm sgeu $at,$t2,$t0 828ec07fdf1Sdjm seq $t9,$t1,$a1 829ec07fdf1Sdjm and $at,$t9 830ec07fdf1Sdjm sltu $t3,$t0,$ta2 831ec07fdf1Sdjm $ADDU $a1,$a2 832ec07fdf1Sdjm $SUBU $t1,$t3 833ec07fdf1Sdjm $SUBU $t0,$ta2 834ec07fdf1Sdjm sltu $t8,$t1,$a1 835ec07fdf1Sdjm sltu $ta0,$a1,$a2 836ec07fdf1Sdjm or $t8,$ta0 837ec07fdf1Sdjm .set noreorder 8389eac5592Smiod beqz $at,.L_bn_div_3_words_inner_loop 839ec07fdf1Sdjm $SUBU $v0,1 8409eac5592Smiod $ADDU $v0,1 841ec07fdf1Sdjm .set reorder 842ec07fdf1Sdjm.L_bn_div_3_words_inner_loop_done: 843ec07fdf1Sdjm .set noreorder 844ec07fdf1Sdjm___ 845ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 846ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 847ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 848ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 849ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 850ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 851ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 852ec07fdf1Sdjm___ 853ec07fdf1Sdjm$code.=<<___; 854ec07fdf1Sdjm jr $ra 855ec07fdf1Sdjm move $a0,$v0 856ec07fdf1Sdjm.end bn_div_3_words_internal 857ec07fdf1Sdjm 858ec07fdf1Sdjm.align 5 859ec07fdf1Sdjm.globl bn_div_words 860ec07fdf1Sdjm.ent bn_div_words 861ec07fdf1Sdjmbn_div_words: 862ec07fdf1Sdjm .set noreorder 863ec07fdf1Sdjm bnez $a2,bn_div_words_internal 864ec07fdf1Sdjm li $v0,-1 # I would rather signal div-by-zero 865ec07fdf1Sdjm # which can be done with 'break 7' 866ec07fdf1Sdjm jr $ra 867ec07fdf1Sdjm move $a0,$v0 868ec07fdf1Sdjm.end bn_div_words 869ec07fdf1Sdjm 870ec07fdf1Sdjm.align 5 871ec07fdf1Sdjm.ent bn_div_words_internal 872ec07fdf1Sdjmbn_div_words_internal: 873ec07fdf1Sdjm___ 874ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 875ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 876ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 877ec07fdf1Sdjm .set noreorder 878ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 879ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 880ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 881ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 882ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 883ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 884ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 885ec07fdf1Sdjm___ 886ec07fdf1Sdjm$code.=<<___; 887ec07fdf1Sdjm move $v1,$zero 888ec07fdf1Sdjm bltz $a2,.L_bn_div_words_body 889ec07fdf1Sdjm move $t9,$v1 890ec07fdf1Sdjm $SLL $a2,1 891ec07fdf1Sdjm bgtz $a2,.-4 892ec07fdf1Sdjm addu $t9,1 893ec07fdf1Sdjm 894ec07fdf1Sdjm .set reorder 895ec07fdf1Sdjm negu $t1,$t9 896ec07fdf1Sdjm li $t2,-1 897ec07fdf1Sdjm $SLL $t2,$t1 898ec07fdf1Sdjm and $t2,$a0 899ec07fdf1Sdjm $SRL $at,$a1,$t1 900ec07fdf1Sdjm .set noreorder 9019eac5592Smiod beqz $t2,.+12 9029eac5592Smiod nop 903ec07fdf1Sdjm break 6 # signal overflow 904ec07fdf1Sdjm .set reorder 905ec07fdf1Sdjm $SLL $a0,$t9 906ec07fdf1Sdjm $SLL $a1,$t9 907ec07fdf1Sdjm or $a0,$at 908ec07fdf1Sdjm___ 909ec07fdf1Sdjm$QT=$ta0; 910ec07fdf1Sdjm$HH=$ta1; 911ec07fdf1Sdjm$DH=$v1; 912ec07fdf1Sdjm$code.=<<___; 913ec07fdf1Sdjm.L_bn_div_words_body: 914ec07fdf1Sdjm $SRL $DH,$a2,4*$BNSZ # bits 915ec07fdf1Sdjm sgeu $at,$a0,$a2 916ec07fdf1Sdjm .set noreorder 9179eac5592Smiod beqz $at,.+12 9189eac5592Smiod nop 919ec07fdf1Sdjm $SUBU $a0,$a2 920ec07fdf1Sdjm .set reorder 921ec07fdf1Sdjm 922ec07fdf1Sdjm li $QT,-1 923ec07fdf1Sdjm $SRL $HH,$a0,4*$BNSZ # bits 924ec07fdf1Sdjm $SRL $QT,4*$BNSZ # q=0xffffffff 925ec07fdf1Sdjm beq $DH,$HH,.L_bn_div_words_skip_div1 926ec07fdf1Sdjm $DIVU $zero,$a0,$DH 927ec07fdf1Sdjm mflo $QT 928ec07fdf1Sdjm.L_bn_div_words_skip_div1: 929ec07fdf1Sdjm $MULTU $a2,$QT 930ec07fdf1Sdjm $SLL $t3,$a0,4*$BNSZ # bits 931ec07fdf1Sdjm $SRL $at,$a1,4*$BNSZ # bits 932ec07fdf1Sdjm or $t3,$at 933ec07fdf1Sdjm mflo $t0 934ec07fdf1Sdjm mfhi $t1 935ec07fdf1Sdjm.L_bn_div_words_inner_loop1: 936ec07fdf1Sdjm sltu $t2,$t3,$t0 937ec07fdf1Sdjm seq $t8,$HH,$t1 938ec07fdf1Sdjm sltu $at,$HH,$t1 939ec07fdf1Sdjm and $t2,$t8 940ec07fdf1Sdjm sltu $v0,$t0,$a2 941ec07fdf1Sdjm or $at,$t2 942ec07fdf1Sdjm .set noreorder 943ec07fdf1Sdjm beqz $at,.L_bn_div_words_inner_loop1_done 944ec07fdf1Sdjm $SUBU $t1,$v0 945ec07fdf1Sdjm $SUBU $t0,$a2 946ec07fdf1Sdjm b .L_bn_div_words_inner_loop1 947ec07fdf1Sdjm $SUBU $QT,1 948ec07fdf1Sdjm .set reorder 949ec07fdf1Sdjm.L_bn_div_words_inner_loop1_done: 950ec07fdf1Sdjm 951ec07fdf1Sdjm $SLL $a1,4*$BNSZ # bits 952ec07fdf1Sdjm $SUBU $a0,$t3,$t0 953ec07fdf1Sdjm $SLL $v0,$QT,4*$BNSZ # bits 954ec07fdf1Sdjm 955ec07fdf1Sdjm li $QT,-1 956ec07fdf1Sdjm $SRL $HH,$a0,4*$BNSZ # bits 957ec07fdf1Sdjm $SRL $QT,4*$BNSZ # q=0xffffffff 958ec07fdf1Sdjm beq $DH,$HH,.L_bn_div_words_skip_div2 959ec07fdf1Sdjm $DIVU $zero,$a0,$DH 960ec07fdf1Sdjm mflo $QT 961ec07fdf1Sdjm.L_bn_div_words_skip_div2: 962ec07fdf1Sdjm $MULTU $a2,$QT 963ec07fdf1Sdjm $SLL $t3,$a0,4*$BNSZ # bits 964ec07fdf1Sdjm $SRL $at,$a1,4*$BNSZ # bits 965ec07fdf1Sdjm or $t3,$at 966ec07fdf1Sdjm mflo $t0 967ec07fdf1Sdjm mfhi $t1 968ec07fdf1Sdjm.L_bn_div_words_inner_loop2: 969ec07fdf1Sdjm sltu $t2,$t3,$t0 970ec07fdf1Sdjm seq $t8,$HH,$t1 971ec07fdf1Sdjm sltu $at,$HH,$t1 972ec07fdf1Sdjm and $t2,$t8 973ec07fdf1Sdjm sltu $v1,$t0,$a2 974ec07fdf1Sdjm or $at,$t2 975ec07fdf1Sdjm .set noreorder 976ec07fdf1Sdjm beqz $at,.L_bn_div_words_inner_loop2_done 977ec07fdf1Sdjm $SUBU $t1,$v1 978ec07fdf1Sdjm $SUBU $t0,$a2 979ec07fdf1Sdjm b .L_bn_div_words_inner_loop2 980ec07fdf1Sdjm $SUBU $QT,1 981ec07fdf1Sdjm .set reorder 982ec07fdf1Sdjm.L_bn_div_words_inner_loop2_done: 983ec07fdf1Sdjm 984ec07fdf1Sdjm $SUBU $a0,$t3,$t0 985ec07fdf1Sdjm or $v0,$QT 986ec07fdf1Sdjm $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it 987ec07fdf1Sdjm $SRL $a2,$t9 # restore $a2 988ec07fdf1Sdjm 989ec07fdf1Sdjm .set noreorder 990ec07fdf1Sdjm move $a1,$v1 991ec07fdf1Sdjm___ 992ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 993ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 994ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 995ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 996ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 997ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 998ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 999ec07fdf1Sdjm___ 1000ec07fdf1Sdjm$code.=<<___; 1001ec07fdf1Sdjm jr $ra 1002ec07fdf1Sdjm move $a0,$v0 1003ec07fdf1Sdjm.end bn_div_words_internal 1004ec07fdf1Sdjm___ 1005ec07fdf1Sdjmundef $HH; undef $QT; undef $DH; 1006ec07fdf1Sdjm 1007ec07fdf1Sdjm($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3); 1008ec07fdf1Sdjm($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3); 1009ec07fdf1Sdjm 1010ec07fdf1Sdjm($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1 1011ec07fdf1Sdjm($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2 1012ec07fdf1Sdjm 1013ec07fdf1Sdjm($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3); 1014ec07fdf1Sdjm 1015ec07fdf1Sdjm$code.=<<___; 1016ec07fdf1Sdjm 1017ec07fdf1Sdjm.align 5 1018ec07fdf1Sdjm.globl bn_mul_comba8 1019ec07fdf1Sdjm.ent bn_mul_comba8 1020ec07fdf1Sdjmbn_mul_comba8: 1021ec07fdf1Sdjm .set noreorder 1022ec07fdf1Sdjm___ 1023ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 1024ec07fdf1Sdjm .frame $sp,12*$SZREG,$ra 1025ec07fdf1Sdjm .mask 0x803ff008,-$SZREG 1026ec07fdf1Sdjm $PTR_SUB $sp,12*$SZREG 1027ec07fdf1Sdjm $REG_S $ra,11*$SZREG($sp) 1028ec07fdf1Sdjm $REG_S $s5,10*$SZREG($sp) 1029ec07fdf1Sdjm $REG_S $s4,9*$SZREG($sp) 1030ec07fdf1Sdjm $REG_S $s3,8*$SZREG($sp) 1031ec07fdf1Sdjm $REG_S $s2,7*$SZREG($sp) 1032ec07fdf1Sdjm $REG_S $s1,6*$SZREG($sp) 1033ec07fdf1Sdjm $REG_S $s0,5*$SZREG($sp) 1034ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 1035ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 1036ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 1037ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 1038ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 1039ec07fdf1Sdjm___ 1040ec07fdf1Sdjm$code.=<<___ if ($flavour !~ /nubi/i); 1041ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 1042ec07fdf1Sdjm .mask 0x003f0000,-$SZREG 1043ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 1044ec07fdf1Sdjm $REG_S $s5,5*$SZREG($sp) 1045ec07fdf1Sdjm $REG_S $s4,4*$SZREG($sp) 1046ec07fdf1Sdjm $REG_S $s3,3*$SZREG($sp) 1047ec07fdf1Sdjm $REG_S $s2,2*$SZREG($sp) 1048ec07fdf1Sdjm $REG_S $s1,1*$SZREG($sp) 1049ec07fdf1Sdjm $REG_S $s0,0*$SZREG($sp) 1050ec07fdf1Sdjm___ 1051ec07fdf1Sdjm$code.=<<___; 1052ec07fdf1Sdjm 1053ec07fdf1Sdjm .set reorder 1054ec07fdf1Sdjm $LD $a_0,0($a1) # If compiled with -mips3 option on 1055ec07fdf1Sdjm # R5000 box assembler barks on this 1056ec07fdf1Sdjm # 1ine with "should not have mult/div 1057ec07fdf1Sdjm # as last instruction in bb (R10K 1058ec07fdf1Sdjm # bug)" warning. If anybody out there 1059ec07fdf1Sdjm # has a clue about how to circumvent 1060ec07fdf1Sdjm # this do send me a note. 1061ec07fdf1Sdjm # <appro\@fy.chalmers.se> 1062ec07fdf1Sdjm 1063ec07fdf1Sdjm $LD $b_0,0($a2) 1064ec07fdf1Sdjm $LD $a_1,$BNSZ($a1) 1065ec07fdf1Sdjm $LD $a_2,2*$BNSZ($a1) 1066ec07fdf1Sdjm $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); 1067ec07fdf1Sdjm $LD $a_3,3*$BNSZ($a1) 1068ec07fdf1Sdjm $LD $b_1,$BNSZ($a2) 1069ec07fdf1Sdjm $LD $b_2,2*$BNSZ($a2) 1070ec07fdf1Sdjm $LD $b_3,3*$BNSZ($a2) 1071ec07fdf1Sdjm mflo $c_1 1072ec07fdf1Sdjm mfhi $c_2 1073ec07fdf1Sdjm 1074ec07fdf1Sdjm $LD $a_4,4*$BNSZ($a1) 1075ec07fdf1Sdjm $LD $a_5,5*$BNSZ($a1) 1076ec07fdf1Sdjm $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); 1077ec07fdf1Sdjm $LD $a_6,6*$BNSZ($a1) 1078ec07fdf1Sdjm $LD $a_7,7*$BNSZ($a1) 1079ec07fdf1Sdjm $LD $b_4,4*$BNSZ($a2) 1080ec07fdf1Sdjm $LD $b_5,5*$BNSZ($a2) 1081ec07fdf1Sdjm mflo $t_1 1082ec07fdf1Sdjm mfhi $t_2 1083ec07fdf1Sdjm $ADDU $c_2,$t_1 1084ec07fdf1Sdjm sltu $at,$c_2,$t_1 1085ec07fdf1Sdjm $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); 1086ec07fdf1Sdjm $ADDU $c_3,$t_2,$at 1087ec07fdf1Sdjm $LD $b_6,6*$BNSZ($a2) 1088ec07fdf1Sdjm $LD $b_7,7*$BNSZ($a2) 1089ec07fdf1Sdjm $ST $c_1,0($a0) # r[0]=c1; 1090ec07fdf1Sdjm mflo $t_1 1091ec07fdf1Sdjm mfhi $t_2 1092ec07fdf1Sdjm $ADDU $c_2,$t_1 1093ec07fdf1Sdjm sltu $at,$c_2,$t_1 1094ec07fdf1Sdjm $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); 1095ec07fdf1Sdjm $ADDU $t_2,$at 1096ec07fdf1Sdjm $ADDU $c_3,$t_2 1097ec07fdf1Sdjm sltu $c_1,$c_3,$t_2 1098ec07fdf1Sdjm $ST $c_2,$BNSZ($a0) # r[1]=c2; 1099ec07fdf1Sdjm 1100ec07fdf1Sdjm mflo $t_1 1101ec07fdf1Sdjm mfhi $t_2 1102ec07fdf1Sdjm $ADDU $c_3,$t_1 1103ec07fdf1Sdjm sltu $at,$c_3,$t_1 1104ec07fdf1Sdjm $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); 1105ec07fdf1Sdjm $ADDU $t_2,$at 1106ec07fdf1Sdjm $ADDU $c_1,$t_2 1107ec07fdf1Sdjm mflo $t_1 1108ec07fdf1Sdjm mfhi $t_2 1109ec07fdf1Sdjm $ADDU $c_3,$t_1 1110ec07fdf1Sdjm sltu $at,$c_3,$t_1 1111ec07fdf1Sdjm $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); 1112ec07fdf1Sdjm $ADDU $t_2,$at 1113ec07fdf1Sdjm $ADDU $c_1,$t_2 1114ec07fdf1Sdjm sltu $c_2,$c_1,$t_2 1115ec07fdf1Sdjm mflo $t_1 1116ec07fdf1Sdjm mfhi $t_2 1117ec07fdf1Sdjm $ADDU $c_3,$t_1 1118ec07fdf1Sdjm sltu $at,$c_3,$t_1 1119ec07fdf1Sdjm $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); 1120ec07fdf1Sdjm $ADDU $t_2,$at 1121ec07fdf1Sdjm $ADDU $c_1,$t_2 1122ec07fdf1Sdjm sltu $at,$c_1,$t_2 1123ec07fdf1Sdjm $ADDU $c_2,$at 1124ec07fdf1Sdjm $ST $c_3,2*$BNSZ($a0) # r[2]=c3; 1125ec07fdf1Sdjm 1126ec07fdf1Sdjm mflo $t_1 1127ec07fdf1Sdjm mfhi $t_2 1128ec07fdf1Sdjm $ADDU $c_1,$t_1 1129ec07fdf1Sdjm sltu $at,$c_1,$t_1 1130ec07fdf1Sdjm $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); 1131ec07fdf1Sdjm $ADDU $t_2,$at 1132ec07fdf1Sdjm $ADDU $c_2,$t_2 1133ec07fdf1Sdjm sltu $c_3,$c_2,$t_2 1134ec07fdf1Sdjm mflo $t_1 1135ec07fdf1Sdjm mfhi $t_2 1136ec07fdf1Sdjm $ADDU $c_1,$t_1 1137ec07fdf1Sdjm sltu $at,$c_1,$t_1 1138ec07fdf1Sdjm $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); 1139ec07fdf1Sdjm $ADDU $t_2,$at 1140ec07fdf1Sdjm $ADDU $c_2,$t_2 1141ec07fdf1Sdjm sltu $at,$c_2,$t_2 1142ec07fdf1Sdjm $ADDU $c_3,$at 1143ec07fdf1Sdjm mflo $t_1 1144ec07fdf1Sdjm mfhi $t_2 1145ec07fdf1Sdjm $ADDU $c_1,$t_1 1146ec07fdf1Sdjm sltu $at,$c_1,$t_1 1147ec07fdf1Sdjm $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); 1148ec07fdf1Sdjm $ADDU $t_2,$at 1149ec07fdf1Sdjm $ADDU $c_2,$t_2 1150ec07fdf1Sdjm sltu $at,$c_2,$t_2 1151ec07fdf1Sdjm $ADDU $c_3,$at 1152ec07fdf1Sdjm mflo $t_1 1153ec07fdf1Sdjm mfhi $t_2 1154ec07fdf1Sdjm $ADDU $c_1,$t_1 1155ec07fdf1Sdjm sltu $at,$c_1,$t_1 1156ec07fdf1Sdjm $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); 1157ec07fdf1Sdjm $ADDU $t_2,$at 1158ec07fdf1Sdjm $ADDU $c_2,$t_2 1159ec07fdf1Sdjm sltu $at,$c_2,$t_2 1160ec07fdf1Sdjm $ADDU $c_3,$at 1161ec07fdf1Sdjm $ST $c_1,3*$BNSZ($a0) # r[3]=c1; 1162ec07fdf1Sdjm 1163ec07fdf1Sdjm mflo $t_1 1164ec07fdf1Sdjm mfhi $t_2 1165ec07fdf1Sdjm $ADDU $c_2,$t_1 1166ec07fdf1Sdjm sltu $at,$c_2,$t_1 1167ec07fdf1Sdjm $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); 1168ec07fdf1Sdjm $ADDU $t_2,$at 1169ec07fdf1Sdjm $ADDU $c_3,$t_2 1170ec07fdf1Sdjm sltu $c_1,$c_3,$t_2 1171ec07fdf1Sdjm mflo $t_1 1172ec07fdf1Sdjm mfhi $t_2 1173ec07fdf1Sdjm $ADDU $c_2,$t_1 1174ec07fdf1Sdjm sltu $at,$c_2,$t_1 1175ec07fdf1Sdjm $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); 1176ec07fdf1Sdjm $ADDU $t_2,$at 1177ec07fdf1Sdjm $ADDU $c_3,$t_2 1178ec07fdf1Sdjm sltu $at,$c_3,$t_2 1179ec07fdf1Sdjm $ADDU $c_1,$at 1180ec07fdf1Sdjm mflo $t_1 1181ec07fdf1Sdjm mfhi $t_2 1182ec07fdf1Sdjm $ADDU $c_2,$t_1 1183ec07fdf1Sdjm sltu $at,$c_2,$t_1 1184ec07fdf1Sdjm $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); 1185ec07fdf1Sdjm $ADDU $t_2,$at 1186ec07fdf1Sdjm $ADDU $c_3,$t_2 1187ec07fdf1Sdjm sltu $at,$c_3,$t_2 1188ec07fdf1Sdjm $ADDU $c_1,$at 1189ec07fdf1Sdjm mflo $t_1 1190ec07fdf1Sdjm mfhi $t_2 1191ec07fdf1Sdjm $ADDU $c_2,$t_1 1192ec07fdf1Sdjm sltu $at,$c_2,$t_1 1193ec07fdf1Sdjm $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); 1194ec07fdf1Sdjm $ADDU $t_2,$at 1195ec07fdf1Sdjm $ADDU $c_3,$t_2 1196ec07fdf1Sdjm sltu $at,$c_3,$t_2 1197ec07fdf1Sdjm $ADDU $c_1,$at 1198ec07fdf1Sdjm mflo $t_1 1199ec07fdf1Sdjm mfhi $t_2 1200ec07fdf1Sdjm $ADDU $c_2,$t_1 1201ec07fdf1Sdjm sltu $at,$c_2,$t_1 1202ec07fdf1Sdjm $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); 1203ec07fdf1Sdjm $ADDU $t_2,$at 1204ec07fdf1Sdjm $ADDU $c_3,$t_2 1205ec07fdf1Sdjm sltu $at,$c_3,$t_2 1206ec07fdf1Sdjm $ADDU $c_1,$at 1207ec07fdf1Sdjm $ST $c_2,4*$BNSZ($a0) # r[4]=c2; 1208ec07fdf1Sdjm 1209ec07fdf1Sdjm mflo $t_1 1210ec07fdf1Sdjm mfhi $t_2 1211ec07fdf1Sdjm $ADDU $c_3,$t_1 1212ec07fdf1Sdjm sltu $at,$c_3,$t_1 1213ec07fdf1Sdjm $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); 1214ec07fdf1Sdjm $ADDU $t_2,$at 1215ec07fdf1Sdjm $ADDU $c_1,$t_2 1216ec07fdf1Sdjm sltu $c_2,$c_1,$t_2 1217ec07fdf1Sdjm mflo $t_1 1218ec07fdf1Sdjm mfhi $t_2 1219ec07fdf1Sdjm $ADDU $c_3,$t_1 1220ec07fdf1Sdjm sltu $at,$c_3,$t_1 1221ec07fdf1Sdjm $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); 1222ec07fdf1Sdjm $ADDU $t_2,$at 1223ec07fdf1Sdjm $ADDU $c_1,$t_2 1224ec07fdf1Sdjm sltu $at,$c_1,$t_2 1225ec07fdf1Sdjm $ADDU $c_2,$at 1226ec07fdf1Sdjm mflo $t_1 1227ec07fdf1Sdjm mfhi $t_2 1228ec07fdf1Sdjm $ADDU $c_3,$t_1 1229ec07fdf1Sdjm sltu $at,$c_3,$t_1 1230ec07fdf1Sdjm $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); 1231ec07fdf1Sdjm $ADDU $t_2,$at 1232ec07fdf1Sdjm $ADDU $c_1,$t_2 1233ec07fdf1Sdjm sltu $at,$c_1,$t_2 1234ec07fdf1Sdjm $ADDU $c_2,$at 1235ec07fdf1Sdjm mflo $t_1 1236ec07fdf1Sdjm mfhi $t_2 1237ec07fdf1Sdjm $ADDU $c_3,$t_1 1238ec07fdf1Sdjm sltu $at,$c_3,$t_1 1239ec07fdf1Sdjm $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); 1240ec07fdf1Sdjm $ADDU $t_2,$at 1241ec07fdf1Sdjm $ADDU $c_1,$t_2 1242ec07fdf1Sdjm sltu $at,$c_1,$t_2 1243ec07fdf1Sdjm $ADDU $c_2,$at 1244ec07fdf1Sdjm mflo $t_1 1245ec07fdf1Sdjm mfhi $t_2 1246ec07fdf1Sdjm $ADDU $c_3,$t_1 1247ec07fdf1Sdjm sltu $at,$c_3,$t_1 1248ec07fdf1Sdjm $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); 1249ec07fdf1Sdjm $ADDU $t_2,$at 1250ec07fdf1Sdjm $ADDU $c_1,$t_2 1251ec07fdf1Sdjm sltu $at,$c_1,$t_2 1252ec07fdf1Sdjm $ADDU $c_2,$at 1253ec07fdf1Sdjm mflo $t_1 1254ec07fdf1Sdjm mfhi $t_2 1255ec07fdf1Sdjm $ADDU $c_3,$t_1 1256ec07fdf1Sdjm sltu $at,$c_3,$t_1 1257ec07fdf1Sdjm $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); 1258ec07fdf1Sdjm $ADDU $t_2,$at 1259ec07fdf1Sdjm $ADDU $c_1,$t_2 1260ec07fdf1Sdjm sltu $at,$c_1,$t_2 1261ec07fdf1Sdjm $ADDU $c_2,$at 1262ec07fdf1Sdjm $ST $c_3,5*$BNSZ($a0) # r[5]=c3; 1263ec07fdf1Sdjm 1264ec07fdf1Sdjm mflo $t_1 1265ec07fdf1Sdjm mfhi $t_2 1266ec07fdf1Sdjm $ADDU $c_1,$t_1 1267ec07fdf1Sdjm sltu $at,$c_1,$t_1 1268ec07fdf1Sdjm $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); 1269ec07fdf1Sdjm $ADDU $t_2,$at 1270ec07fdf1Sdjm $ADDU $c_2,$t_2 1271ec07fdf1Sdjm sltu $c_3,$c_2,$t_2 1272ec07fdf1Sdjm mflo $t_1 1273ec07fdf1Sdjm mfhi $t_2 1274ec07fdf1Sdjm $ADDU $c_1,$t_1 1275ec07fdf1Sdjm sltu $at,$c_1,$t_1 1276ec07fdf1Sdjm $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); 1277ec07fdf1Sdjm $ADDU $t_2,$at 1278ec07fdf1Sdjm $ADDU $c_2,$t_2 1279ec07fdf1Sdjm sltu $at,$c_2,$t_2 1280ec07fdf1Sdjm $ADDU $c_3,$at 1281ec07fdf1Sdjm mflo $t_1 1282ec07fdf1Sdjm mfhi $t_2 1283ec07fdf1Sdjm $ADDU $c_1,$t_1 1284ec07fdf1Sdjm sltu $at,$c_1,$t_1 1285ec07fdf1Sdjm $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); 1286ec07fdf1Sdjm $ADDU $t_2,$at 1287ec07fdf1Sdjm $ADDU $c_2,$t_2 1288ec07fdf1Sdjm sltu $at,$c_2,$t_2 1289ec07fdf1Sdjm $ADDU $c_3,$at 1290ec07fdf1Sdjm mflo $t_1 1291ec07fdf1Sdjm mfhi $t_2 1292ec07fdf1Sdjm $ADDU $c_1,$t_1 1293ec07fdf1Sdjm sltu $at,$c_1,$t_1 1294ec07fdf1Sdjm $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); 1295ec07fdf1Sdjm $ADDU $t_2,$at 1296ec07fdf1Sdjm $ADDU $c_2,$t_2 1297ec07fdf1Sdjm sltu $at,$c_2,$t_2 1298ec07fdf1Sdjm $ADDU $c_3,$at 1299ec07fdf1Sdjm mflo $t_1 1300ec07fdf1Sdjm mfhi $t_2 1301ec07fdf1Sdjm $ADDU $c_1,$t_1 1302ec07fdf1Sdjm sltu $at,$c_1,$t_1 1303ec07fdf1Sdjm $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); 1304ec07fdf1Sdjm $ADDU $t_2,$at 1305ec07fdf1Sdjm $ADDU $c_2,$t_2 1306ec07fdf1Sdjm sltu $at,$c_2,$t_2 1307ec07fdf1Sdjm $ADDU $c_3,$at 1308ec07fdf1Sdjm mflo $t_1 1309ec07fdf1Sdjm mfhi $t_2 1310ec07fdf1Sdjm $ADDU $c_1,$t_1 1311ec07fdf1Sdjm sltu $at,$c_1,$t_1 1312ec07fdf1Sdjm $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); 1313ec07fdf1Sdjm $ADDU $t_2,$at 1314ec07fdf1Sdjm $ADDU $c_2,$t_2 1315ec07fdf1Sdjm sltu $at,$c_2,$t_2 1316ec07fdf1Sdjm $ADDU $c_3,$at 1317ec07fdf1Sdjm mflo $t_1 1318ec07fdf1Sdjm mfhi $t_2 1319ec07fdf1Sdjm $ADDU $c_1,$t_1 1320ec07fdf1Sdjm sltu $at,$c_1,$t_1 1321ec07fdf1Sdjm $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); 1322ec07fdf1Sdjm $ADDU $t_2,$at 1323ec07fdf1Sdjm $ADDU $c_2,$t_2 1324ec07fdf1Sdjm sltu $at,$c_2,$t_2 1325ec07fdf1Sdjm $ADDU $c_3,$at 1326ec07fdf1Sdjm $ST $c_1,6*$BNSZ($a0) # r[6]=c1; 1327ec07fdf1Sdjm 1328ec07fdf1Sdjm mflo $t_1 1329ec07fdf1Sdjm mfhi $t_2 1330ec07fdf1Sdjm $ADDU $c_2,$t_1 1331ec07fdf1Sdjm sltu $at,$c_2,$t_1 1332ec07fdf1Sdjm $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); 1333ec07fdf1Sdjm $ADDU $t_2,$at 1334ec07fdf1Sdjm $ADDU $c_3,$t_2 1335ec07fdf1Sdjm sltu $c_1,$c_3,$t_2 1336ec07fdf1Sdjm mflo $t_1 1337ec07fdf1Sdjm mfhi $t_2 1338ec07fdf1Sdjm $ADDU $c_2,$t_1 1339ec07fdf1Sdjm sltu $at,$c_2,$t_1 1340ec07fdf1Sdjm $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); 1341ec07fdf1Sdjm $ADDU $t_2,$at 1342ec07fdf1Sdjm $ADDU $c_3,$t_2 1343ec07fdf1Sdjm sltu $at,$c_3,$t_2 1344ec07fdf1Sdjm $ADDU $c_1,$at 1345ec07fdf1Sdjm mflo $t_1 1346ec07fdf1Sdjm mfhi $t_2 1347ec07fdf1Sdjm $ADDU $c_2,$t_1 1348ec07fdf1Sdjm sltu $at,$c_2,$t_1 1349ec07fdf1Sdjm $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); 1350ec07fdf1Sdjm $ADDU $t_2,$at 1351ec07fdf1Sdjm $ADDU $c_3,$t_2 1352ec07fdf1Sdjm sltu $at,$c_3,$t_2 1353ec07fdf1Sdjm $ADDU $c_1,$at 1354ec07fdf1Sdjm mflo $t_1 1355ec07fdf1Sdjm mfhi $t_2 1356ec07fdf1Sdjm $ADDU $c_2,$t_1 1357ec07fdf1Sdjm sltu $at,$c_2,$t_1 1358ec07fdf1Sdjm $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); 1359ec07fdf1Sdjm $ADDU $t_2,$at 1360ec07fdf1Sdjm $ADDU $c_3,$t_2 1361ec07fdf1Sdjm sltu $at,$c_3,$t_2 1362ec07fdf1Sdjm $ADDU $c_1,$at 1363ec07fdf1Sdjm mflo $t_1 1364ec07fdf1Sdjm mfhi $t_2 1365ec07fdf1Sdjm $ADDU $c_2,$t_1 1366ec07fdf1Sdjm sltu $at,$c_2,$t_1 1367ec07fdf1Sdjm $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); 1368ec07fdf1Sdjm $ADDU $t_2,$at 1369ec07fdf1Sdjm $ADDU $c_3,$t_2 1370ec07fdf1Sdjm sltu $at,$c_3,$t_2 1371ec07fdf1Sdjm $ADDU $c_1,$at 1372ec07fdf1Sdjm mflo $t_1 1373ec07fdf1Sdjm mfhi $t_2 1374ec07fdf1Sdjm $ADDU $c_2,$t_1 1375ec07fdf1Sdjm sltu $at,$c_2,$t_1 1376ec07fdf1Sdjm $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); 1377ec07fdf1Sdjm $ADDU $t_2,$at 1378ec07fdf1Sdjm $ADDU $c_3,$t_2 1379ec07fdf1Sdjm sltu $at,$c_3,$t_2 1380ec07fdf1Sdjm $ADDU $c_1,$at 1381ec07fdf1Sdjm mflo $t_1 1382ec07fdf1Sdjm mfhi $t_2 1383ec07fdf1Sdjm $ADDU $c_2,$t_1 1384ec07fdf1Sdjm sltu $at,$c_2,$t_1 1385ec07fdf1Sdjm $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); 1386ec07fdf1Sdjm $ADDU $t_2,$at 1387ec07fdf1Sdjm $ADDU $c_3,$t_2 1388ec07fdf1Sdjm sltu $at,$c_3,$t_2 1389ec07fdf1Sdjm $ADDU $c_1,$at 1390ec07fdf1Sdjm mflo $t_1 1391ec07fdf1Sdjm mfhi $t_2 1392ec07fdf1Sdjm $ADDU $c_2,$t_1 1393ec07fdf1Sdjm sltu $at,$c_2,$t_1 1394ec07fdf1Sdjm $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); 1395ec07fdf1Sdjm $ADDU $t_2,$at 1396ec07fdf1Sdjm $ADDU $c_3,$t_2 1397ec07fdf1Sdjm sltu $at,$c_3,$t_2 1398ec07fdf1Sdjm $ADDU $c_1,$at 1399ec07fdf1Sdjm $ST $c_2,7*$BNSZ($a0) # r[7]=c2; 1400ec07fdf1Sdjm 1401ec07fdf1Sdjm mflo $t_1 1402ec07fdf1Sdjm mfhi $t_2 1403ec07fdf1Sdjm $ADDU $c_3,$t_1 1404ec07fdf1Sdjm sltu $at,$c_3,$t_1 1405ec07fdf1Sdjm $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); 1406ec07fdf1Sdjm $ADDU $t_2,$at 1407ec07fdf1Sdjm $ADDU $c_1,$t_2 1408ec07fdf1Sdjm sltu $c_2,$c_1,$t_2 1409ec07fdf1Sdjm mflo $t_1 1410ec07fdf1Sdjm mfhi $t_2 1411ec07fdf1Sdjm $ADDU $c_3,$t_1 1412ec07fdf1Sdjm sltu $at,$c_3,$t_1 1413ec07fdf1Sdjm $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); 1414ec07fdf1Sdjm $ADDU $t_2,$at 1415ec07fdf1Sdjm $ADDU $c_1,$t_2 1416ec07fdf1Sdjm sltu $at,$c_1,$t_2 1417ec07fdf1Sdjm $ADDU $c_2,$at 1418ec07fdf1Sdjm mflo $t_1 1419ec07fdf1Sdjm mfhi $t_2 1420ec07fdf1Sdjm $ADDU $c_3,$t_1 1421ec07fdf1Sdjm sltu $at,$c_3,$t_1 1422ec07fdf1Sdjm $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); 1423ec07fdf1Sdjm $ADDU $t_2,$at 1424ec07fdf1Sdjm $ADDU $c_1,$t_2 1425ec07fdf1Sdjm sltu $at,$c_1,$t_2 1426ec07fdf1Sdjm $ADDU $c_2,$at 1427ec07fdf1Sdjm mflo $t_1 1428ec07fdf1Sdjm mfhi $t_2 1429ec07fdf1Sdjm $ADDU $c_3,$t_1 1430ec07fdf1Sdjm sltu $at,$c_3,$t_1 1431ec07fdf1Sdjm $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); 1432ec07fdf1Sdjm $ADDU $t_2,$at 1433ec07fdf1Sdjm $ADDU $c_1,$t_2 1434ec07fdf1Sdjm sltu $at,$c_1,$t_2 1435ec07fdf1Sdjm $ADDU $c_2,$at 1436ec07fdf1Sdjm mflo $t_1 1437ec07fdf1Sdjm mfhi $t_2 1438ec07fdf1Sdjm $ADDU $c_3,$t_1 1439ec07fdf1Sdjm sltu $at,$c_3,$t_1 1440ec07fdf1Sdjm $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); 1441ec07fdf1Sdjm $ADDU $t_2,$at 1442ec07fdf1Sdjm $ADDU $c_1,$t_2 1443ec07fdf1Sdjm sltu $at,$c_1,$t_2 1444ec07fdf1Sdjm $ADDU $c_2,$at 1445ec07fdf1Sdjm mflo $t_1 1446ec07fdf1Sdjm mfhi $t_2 1447ec07fdf1Sdjm $ADDU $c_3,$t_1 1448ec07fdf1Sdjm sltu $at,$c_3,$t_1 1449ec07fdf1Sdjm $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); 1450ec07fdf1Sdjm $ADDU $t_2,$at 1451ec07fdf1Sdjm $ADDU $c_1,$t_2 1452ec07fdf1Sdjm sltu $at,$c_1,$t_2 1453ec07fdf1Sdjm $ADDU $c_2,$at 1454ec07fdf1Sdjm mflo $t_1 1455ec07fdf1Sdjm mfhi $t_2 1456ec07fdf1Sdjm $ADDU $c_3,$t_1 1457ec07fdf1Sdjm sltu $at,$c_3,$t_1 1458ec07fdf1Sdjm $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); 1459ec07fdf1Sdjm $ADDU $t_2,$at 1460ec07fdf1Sdjm $ADDU $c_1,$t_2 1461ec07fdf1Sdjm sltu $at,$c_1,$t_2 1462ec07fdf1Sdjm $ADDU $c_2,$at 1463ec07fdf1Sdjm $ST $c_3,8*$BNSZ($a0) # r[8]=c3; 1464ec07fdf1Sdjm 1465ec07fdf1Sdjm mflo $t_1 1466ec07fdf1Sdjm mfhi $t_2 1467ec07fdf1Sdjm $ADDU $c_1,$t_1 1468ec07fdf1Sdjm sltu $at,$c_1,$t_1 1469ec07fdf1Sdjm $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); 1470ec07fdf1Sdjm $ADDU $t_2,$at 1471ec07fdf1Sdjm $ADDU $c_2,$t_2 1472ec07fdf1Sdjm sltu $c_3,$c_2,$t_2 1473ec07fdf1Sdjm mflo $t_1 1474ec07fdf1Sdjm mfhi $t_2 1475ec07fdf1Sdjm $ADDU $c_1,$t_1 1476ec07fdf1Sdjm sltu $at,$c_1,$t_1 1477ec07fdf1Sdjm $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); 1478ec07fdf1Sdjm $ADDU $t_2,$at 1479ec07fdf1Sdjm $ADDU $c_2,$t_2 1480ec07fdf1Sdjm sltu $at,$c_2,$t_2 1481ec07fdf1Sdjm $ADDU $c_3,$at 1482ec07fdf1Sdjm mflo $t_1 1483ec07fdf1Sdjm mfhi $t_2 1484ec07fdf1Sdjm $ADDU $c_1,$t_1 1485ec07fdf1Sdjm sltu $at,$c_1,$t_1 1486ec07fdf1Sdjm $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); 1487ec07fdf1Sdjm $ADDU $t_2,$at 1488ec07fdf1Sdjm $ADDU $c_2,$t_2 1489ec07fdf1Sdjm sltu $at,$c_2,$t_2 1490ec07fdf1Sdjm $ADDU $c_3,$at 1491ec07fdf1Sdjm mflo $t_1 1492ec07fdf1Sdjm mfhi $t_2 1493ec07fdf1Sdjm $ADDU $c_1,$t_1 1494ec07fdf1Sdjm sltu $at,$c_1,$t_1 1495ec07fdf1Sdjm $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); 1496ec07fdf1Sdjm $ADDU $t_2,$at 1497ec07fdf1Sdjm $ADDU $c_2,$t_2 1498ec07fdf1Sdjm sltu $at,$c_2,$t_2 1499ec07fdf1Sdjm $ADDU $c_3,$at 1500ec07fdf1Sdjm mflo $t_1 1501ec07fdf1Sdjm mfhi $t_2 1502ec07fdf1Sdjm $ADDU $c_1,$t_1 1503ec07fdf1Sdjm sltu $at,$c_1,$t_1 1504ec07fdf1Sdjm $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); 1505ec07fdf1Sdjm $ADDU $t_2,$at 1506ec07fdf1Sdjm $ADDU $c_2,$t_2 1507ec07fdf1Sdjm sltu $at,$c_2,$t_2 1508ec07fdf1Sdjm $ADDU $c_3,$at 1509ec07fdf1Sdjm mflo $t_1 1510ec07fdf1Sdjm mfhi $t_2 1511ec07fdf1Sdjm $ADDU $c_1,$t_1 1512ec07fdf1Sdjm sltu $at,$c_1,$t_1 1513ec07fdf1Sdjm $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); 1514ec07fdf1Sdjm $ADDU $t_2,$at 1515ec07fdf1Sdjm $ADDU $c_2,$t_2 1516ec07fdf1Sdjm sltu $at,$c_2,$t_2 1517ec07fdf1Sdjm $ADDU $c_3,$at 1518ec07fdf1Sdjm $ST $c_1,9*$BNSZ($a0) # r[9]=c1; 1519ec07fdf1Sdjm 1520ec07fdf1Sdjm mflo $t_1 1521ec07fdf1Sdjm mfhi $t_2 1522ec07fdf1Sdjm $ADDU $c_2,$t_1 1523ec07fdf1Sdjm sltu $at,$c_2,$t_1 1524ec07fdf1Sdjm $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); 1525ec07fdf1Sdjm $ADDU $t_2,$at 1526ec07fdf1Sdjm $ADDU $c_3,$t_2 1527ec07fdf1Sdjm sltu $c_1,$c_3,$t_2 1528ec07fdf1Sdjm mflo $t_1 1529ec07fdf1Sdjm mfhi $t_2 1530ec07fdf1Sdjm $ADDU $c_2,$t_1 1531ec07fdf1Sdjm sltu $at,$c_2,$t_1 1532ec07fdf1Sdjm $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); 1533ec07fdf1Sdjm $ADDU $t_2,$at 1534ec07fdf1Sdjm $ADDU $c_3,$t_2 1535ec07fdf1Sdjm sltu $at,$c_3,$t_2 1536ec07fdf1Sdjm $ADDU $c_1,$at 1537ec07fdf1Sdjm mflo $t_1 1538ec07fdf1Sdjm mfhi $t_2 1539ec07fdf1Sdjm $ADDU $c_2,$t_1 1540ec07fdf1Sdjm sltu $at,$c_2,$t_1 1541ec07fdf1Sdjm $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); 1542ec07fdf1Sdjm $ADDU $t_2,$at 1543ec07fdf1Sdjm $ADDU $c_3,$t_2 1544ec07fdf1Sdjm sltu $at,$c_3,$t_2 1545ec07fdf1Sdjm $ADDU $c_1,$at 1546ec07fdf1Sdjm mflo $t_1 1547ec07fdf1Sdjm mfhi $t_2 1548ec07fdf1Sdjm $ADDU $c_2,$t_1 1549ec07fdf1Sdjm sltu $at,$c_2,$t_1 1550ec07fdf1Sdjm $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); 1551ec07fdf1Sdjm $ADDU $t_2,$at 1552ec07fdf1Sdjm $ADDU $c_3,$t_2 1553ec07fdf1Sdjm sltu $at,$c_3,$t_2 1554ec07fdf1Sdjm $ADDU $c_1,$at 1555ec07fdf1Sdjm mflo $t_1 1556ec07fdf1Sdjm mfhi $t_2 1557ec07fdf1Sdjm $ADDU $c_2,$t_1 1558ec07fdf1Sdjm sltu $at,$c_2,$t_1 1559ec07fdf1Sdjm $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); 1560ec07fdf1Sdjm $ADDU $t_2,$at 1561ec07fdf1Sdjm $ADDU $c_3,$t_2 1562ec07fdf1Sdjm sltu $at,$c_3,$t_2 1563ec07fdf1Sdjm $ADDU $c_1,$at 1564ec07fdf1Sdjm $ST $c_2,10*$BNSZ($a0) # r[10]=c2; 1565ec07fdf1Sdjm 1566ec07fdf1Sdjm mflo $t_1 1567ec07fdf1Sdjm mfhi $t_2 1568ec07fdf1Sdjm $ADDU $c_3,$t_1 1569ec07fdf1Sdjm sltu $at,$c_3,$t_1 1570ec07fdf1Sdjm $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); 1571ec07fdf1Sdjm $ADDU $t_2,$at 1572ec07fdf1Sdjm $ADDU $c_1,$t_2 1573ec07fdf1Sdjm sltu $c_2,$c_1,$t_2 1574ec07fdf1Sdjm mflo $t_1 1575ec07fdf1Sdjm mfhi $t_2 1576ec07fdf1Sdjm $ADDU $c_3,$t_1 1577ec07fdf1Sdjm sltu $at,$c_3,$t_1 1578ec07fdf1Sdjm $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); 1579ec07fdf1Sdjm $ADDU $t_2,$at 1580ec07fdf1Sdjm $ADDU $c_1,$t_2 1581ec07fdf1Sdjm sltu $at,$c_1,$t_2 1582ec07fdf1Sdjm $ADDU $c_2,$at 1583ec07fdf1Sdjm mflo $t_1 1584ec07fdf1Sdjm mfhi $t_2 1585ec07fdf1Sdjm $ADDU $c_3,$t_1 1586ec07fdf1Sdjm sltu $at,$c_3,$t_1 1587ec07fdf1Sdjm $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); 1588ec07fdf1Sdjm $ADDU $t_2,$at 1589ec07fdf1Sdjm $ADDU $c_1,$t_2 1590ec07fdf1Sdjm sltu $at,$c_1,$t_2 1591ec07fdf1Sdjm $ADDU $c_2,$at 1592ec07fdf1Sdjm mflo $t_1 1593ec07fdf1Sdjm mfhi $t_2 1594ec07fdf1Sdjm $ADDU $c_3,$t_1 1595ec07fdf1Sdjm sltu $at,$c_3,$t_1 1596ec07fdf1Sdjm $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); 1597ec07fdf1Sdjm $ADDU $t_2,$at 1598ec07fdf1Sdjm $ADDU $c_1,$t_2 1599ec07fdf1Sdjm sltu $at,$c_1,$t_2 1600ec07fdf1Sdjm $ADDU $c_2,$at 1601ec07fdf1Sdjm $ST $c_3,11*$BNSZ($a0) # r[11]=c3; 1602ec07fdf1Sdjm 1603ec07fdf1Sdjm mflo $t_1 1604ec07fdf1Sdjm mfhi $t_2 1605ec07fdf1Sdjm $ADDU $c_1,$t_1 1606ec07fdf1Sdjm sltu $at,$c_1,$t_1 1607ec07fdf1Sdjm $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); 1608ec07fdf1Sdjm $ADDU $t_2,$at 1609ec07fdf1Sdjm $ADDU $c_2,$t_2 1610ec07fdf1Sdjm sltu $c_3,$c_2,$t_2 1611ec07fdf1Sdjm mflo $t_1 1612ec07fdf1Sdjm mfhi $t_2 1613ec07fdf1Sdjm $ADDU $c_1,$t_1 1614ec07fdf1Sdjm sltu $at,$c_1,$t_1 1615ec07fdf1Sdjm $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); 1616ec07fdf1Sdjm $ADDU $t_2,$at 1617ec07fdf1Sdjm $ADDU $c_2,$t_2 1618ec07fdf1Sdjm sltu $at,$c_2,$t_2 1619ec07fdf1Sdjm $ADDU $c_3,$at 1620ec07fdf1Sdjm mflo $t_1 1621ec07fdf1Sdjm mfhi $t_2 1622ec07fdf1Sdjm $ADDU $c_1,$t_1 1623ec07fdf1Sdjm sltu $at,$c_1,$t_1 1624ec07fdf1Sdjm $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); 1625ec07fdf1Sdjm $ADDU $t_2,$at 1626ec07fdf1Sdjm $ADDU $c_2,$t_2 1627ec07fdf1Sdjm sltu $at,$c_2,$t_2 1628ec07fdf1Sdjm $ADDU $c_3,$at 1629ec07fdf1Sdjm $ST $c_1,12*$BNSZ($a0) # r[12]=c1; 1630ec07fdf1Sdjm 1631ec07fdf1Sdjm mflo $t_1 1632ec07fdf1Sdjm mfhi $t_2 1633ec07fdf1Sdjm $ADDU $c_2,$t_1 1634ec07fdf1Sdjm sltu $at,$c_2,$t_1 1635ec07fdf1Sdjm $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); 1636ec07fdf1Sdjm $ADDU $t_2,$at 1637ec07fdf1Sdjm $ADDU $c_3,$t_2 1638ec07fdf1Sdjm sltu $c_1,$c_3,$t_2 1639ec07fdf1Sdjm mflo $t_1 1640ec07fdf1Sdjm mfhi $t_2 1641ec07fdf1Sdjm $ADDU $c_2,$t_1 1642ec07fdf1Sdjm sltu $at,$c_2,$t_1 1643ec07fdf1Sdjm $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); 1644ec07fdf1Sdjm $ADDU $t_2,$at 1645ec07fdf1Sdjm $ADDU $c_3,$t_2 1646ec07fdf1Sdjm sltu $at,$c_3,$t_2 1647ec07fdf1Sdjm $ADDU $c_1,$at 1648ec07fdf1Sdjm $ST $c_2,13*$BNSZ($a0) # r[13]=c2; 1649ec07fdf1Sdjm 1650ec07fdf1Sdjm mflo $t_1 1651ec07fdf1Sdjm mfhi $t_2 1652ec07fdf1Sdjm $ADDU $c_3,$t_1 1653ec07fdf1Sdjm sltu $at,$c_3,$t_1 1654ec07fdf1Sdjm $ADDU $t_2,$at 1655ec07fdf1Sdjm $ADDU $c_1,$t_2 1656ec07fdf1Sdjm $ST $c_3,14*$BNSZ($a0) # r[14]=c3; 1657ec07fdf1Sdjm $ST $c_1,15*$BNSZ($a0) # r[15]=c1; 1658ec07fdf1Sdjm 1659ec07fdf1Sdjm .set noreorder 1660ec07fdf1Sdjm___ 1661ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 1662ec07fdf1Sdjm $REG_L $s5,10*$SZREG($sp) 1663ec07fdf1Sdjm $REG_L $s4,9*$SZREG($sp) 1664ec07fdf1Sdjm $REG_L $s3,8*$SZREG($sp) 1665ec07fdf1Sdjm $REG_L $s2,7*$SZREG($sp) 1666ec07fdf1Sdjm $REG_L $s1,6*$SZREG($sp) 1667ec07fdf1Sdjm $REG_L $s0,5*$SZREG($sp) 1668ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 1669ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 1670ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 1671ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 1672ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 1673ec07fdf1Sdjm jr $ra 1674ec07fdf1Sdjm $PTR_ADD $sp,12*$SZREG 1675ec07fdf1Sdjm___ 1676ec07fdf1Sdjm$code.=<<___ if ($flavour !~ /nubi/i); 1677ec07fdf1Sdjm $REG_L $s5,5*$SZREG($sp) 1678ec07fdf1Sdjm $REG_L $s4,4*$SZREG($sp) 1679ec07fdf1Sdjm $REG_L $s3,3*$SZREG($sp) 1680ec07fdf1Sdjm $REG_L $s2,2*$SZREG($sp) 1681ec07fdf1Sdjm $REG_L $s1,1*$SZREG($sp) 1682ec07fdf1Sdjm $REG_L $s0,0*$SZREG($sp) 1683ec07fdf1Sdjm jr $ra 1684ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 1685ec07fdf1Sdjm___ 1686ec07fdf1Sdjm$code.=<<___; 1687ec07fdf1Sdjm.end bn_mul_comba8 1688ec07fdf1Sdjm 1689ec07fdf1Sdjm.align 5 1690ec07fdf1Sdjm.globl bn_mul_comba4 1691ec07fdf1Sdjm.ent bn_mul_comba4 1692ec07fdf1Sdjmbn_mul_comba4: 1693ec07fdf1Sdjm___ 1694ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 1695ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 1696ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 1697ec07fdf1Sdjm .set noreorder 1698ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 1699ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 1700ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 1701ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 1702ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 1703ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 1704ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 1705ec07fdf1Sdjm___ 1706ec07fdf1Sdjm$code.=<<___; 1707ec07fdf1Sdjm .set reorder 1708ec07fdf1Sdjm $LD $a_0,0($a1) 1709ec07fdf1Sdjm $LD $b_0,0($a2) 1710ec07fdf1Sdjm $LD $a_1,$BNSZ($a1) 1711ec07fdf1Sdjm $LD $a_2,2*$BNSZ($a1) 1712ec07fdf1Sdjm $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); 1713ec07fdf1Sdjm $LD $a_3,3*$BNSZ($a1) 1714ec07fdf1Sdjm $LD $b_1,$BNSZ($a2) 1715ec07fdf1Sdjm $LD $b_2,2*$BNSZ($a2) 1716ec07fdf1Sdjm $LD $b_3,3*$BNSZ($a2) 1717ec07fdf1Sdjm mflo $c_1 1718ec07fdf1Sdjm mfhi $c_2 1719ec07fdf1Sdjm $ST $c_1,0($a0) 1720ec07fdf1Sdjm 1721ec07fdf1Sdjm $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); 1722ec07fdf1Sdjm mflo $t_1 1723ec07fdf1Sdjm mfhi $t_2 1724ec07fdf1Sdjm $ADDU $c_2,$t_1 1725ec07fdf1Sdjm sltu $at,$c_2,$t_1 1726ec07fdf1Sdjm $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); 1727ec07fdf1Sdjm $ADDU $c_3,$t_2,$at 1728ec07fdf1Sdjm mflo $t_1 1729ec07fdf1Sdjm mfhi $t_2 1730ec07fdf1Sdjm $ADDU $c_2,$t_1 1731ec07fdf1Sdjm sltu $at,$c_2,$t_1 1732ec07fdf1Sdjm $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); 1733ec07fdf1Sdjm $ADDU $t_2,$at 1734ec07fdf1Sdjm $ADDU $c_3,$t_2 1735ec07fdf1Sdjm sltu $c_1,$c_3,$t_2 1736ec07fdf1Sdjm $ST $c_2,$BNSZ($a0) 1737ec07fdf1Sdjm 1738ec07fdf1Sdjm mflo $t_1 1739ec07fdf1Sdjm mfhi $t_2 1740ec07fdf1Sdjm $ADDU $c_3,$t_1 1741ec07fdf1Sdjm sltu $at,$c_3,$t_1 1742ec07fdf1Sdjm $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); 1743ec07fdf1Sdjm $ADDU $t_2,$at 1744ec07fdf1Sdjm $ADDU $c_1,$t_2 1745ec07fdf1Sdjm mflo $t_1 1746ec07fdf1Sdjm mfhi $t_2 1747ec07fdf1Sdjm $ADDU $c_3,$t_1 1748ec07fdf1Sdjm sltu $at,$c_3,$t_1 1749ec07fdf1Sdjm $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); 1750ec07fdf1Sdjm $ADDU $t_2,$at 1751ec07fdf1Sdjm $ADDU $c_1,$t_2 1752ec07fdf1Sdjm sltu $c_2,$c_1,$t_2 1753ec07fdf1Sdjm mflo $t_1 1754ec07fdf1Sdjm mfhi $t_2 1755ec07fdf1Sdjm $ADDU $c_3,$t_1 1756ec07fdf1Sdjm sltu $at,$c_3,$t_1 1757ec07fdf1Sdjm $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); 1758ec07fdf1Sdjm $ADDU $t_2,$at 1759ec07fdf1Sdjm $ADDU $c_1,$t_2 1760ec07fdf1Sdjm sltu $at,$c_1,$t_2 1761ec07fdf1Sdjm $ADDU $c_2,$at 1762ec07fdf1Sdjm $ST $c_3,2*$BNSZ($a0) 1763ec07fdf1Sdjm 1764ec07fdf1Sdjm mflo $t_1 1765ec07fdf1Sdjm mfhi $t_2 1766ec07fdf1Sdjm $ADDU $c_1,$t_1 1767ec07fdf1Sdjm sltu $at,$c_1,$t_1 1768ec07fdf1Sdjm $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); 1769ec07fdf1Sdjm $ADDU $t_2,$at 1770ec07fdf1Sdjm $ADDU $c_2,$t_2 1771ec07fdf1Sdjm sltu $c_3,$c_2,$t_2 1772ec07fdf1Sdjm mflo $t_1 1773ec07fdf1Sdjm mfhi $t_2 1774ec07fdf1Sdjm $ADDU $c_1,$t_1 1775ec07fdf1Sdjm sltu $at,$c_1,$t_1 1776ec07fdf1Sdjm $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); 1777ec07fdf1Sdjm $ADDU $t_2,$at 1778ec07fdf1Sdjm $ADDU $c_2,$t_2 1779ec07fdf1Sdjm sltu $at,$c_2,$t_2 1780ec07fdf1Sdjm $ADDU $c_3,$at 1781ec07fdf1Sdjm mflo $t_1 1782ec07fdf1Sdjm mfhi $t_2 1783ec07fdf1Sdjm $ADDU $c_1,$t_1 1784ec07fdf1Sdjm sltu $at,$c_1,$t_1 1785ec07fdf1Sdjm $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); 1786ec07fdf1Sdjm $ADDU $t_2,$at 1787ec07fdf1Sdjm $ADDU $c_2,$t_2 1788ec07fdf1Sdjm sltu $at,$c_2,$t_2 1789ec07fdf1Sdjm $ADDU $c_3,$at 1790ec07fdf1Sdjm mflo $t_1 1791ec07fdf1Sdjm mfhi $t_2 1792ec07fdf1Sdjm $ADDU $c_1,$t_1 1793ec07fdf1Sdjm sltu $at,$c_1,$t_1 1794ec07fdf1Sdjm $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); 1795ec07fdf1Sdjm $ADDU $t_2,$at 1796ec07fdf1Sdjm $ADDU $c_2,$t_2 1797ec07fdf1Sdjm sltu $at,$c_2,$t_2 1798ec07fdf1Sdjm $ADDU $c_3,$at 1799ec07fdf1Sdjm $ST $c_1,3*$BNSZ($a0) 1800ec07fdf1Sdjm 1801ec07fdf1Sdjm mflo $t_1 1802ec07fdf1Sdjm mfhi $t_2 1803ec07fdf1Sdjm $ADDU $c_2,$t_1 1804ec07fdf1Sdjm sltu $at,$c_2,$t_1 1805ec07fdf1Sdjm $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); 1806ec07fdf1Sdjm $ADDU $t_2,$at 1807ec07fdf1Sdjm $ADDU $c_3,$t_2 1808ec07fdf1Sdjm sltu $c_1,$c_3,$t_2 1809ec07fdf1Sdjm mflo $t_1 1810ec07fdf1Sdjm mfhi $t_2 1811ec07fdf1Sdjm $ADDU $c_2,$t_1 1812ec07fdf1Sdjm sltu $at,$c_2,$t_1 1813ec07fdf1Sdjm $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); 1814ec07fdf1Sdjm $ADDU $t_2,$at 1815ec07fdf1Sdjm $ADDU $c_3,$t_2 1816ec07fdf1Sdjm sltu $at,$c_3,$t_2 1817ec07fdf1Sdjm $ADDU $c_1,$at 1818ec07fdf1Sdjm mflo $t_1 1819ec07fdf1Sdjm mfhi $t_2 1820ec07fdf1Sdjm $ADDU $c_2,$t_1 1821ec07fdf1Sdjm sltu $at,$c_2,$t_1 1822ec07fdf1Sdjm $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); 1823ec07fdf1Sdjm $ADDU $t_2,$at 1824ec07fdf1Sdjm $ADDU $c_3,$t_2 1825ec07fdf1Sdjm sltu $at,$c_3,$t_2 1826ec07fdf1Sdjm $ADDU $c_1,$at 1827ec07fdf1Sdjm $ST $c_2,4*$BNSZ($a0) 1828ec07fdf1Sdjm 1829ec07fdf1Sdjm mflo $t_1 1830ec07fdf1Sdjm mfhi $t_2 1831ec07fdf1Sdjm $ADDU $c_3,$t_1 1832ec07fdf1Sdjm sltu $at,$c_3,$t_1 1833ec07fdf1Sdjm $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); 1834ec07fdf1Sdjm $ADDU $t_2,$at 1835ec07fdf1Sdjm $ADDU $c_1,$t_2 1836ec07fdf1Sdjm sltu $c_2,$c_1,$t_2 1837ec07fdf1Sdjm mflo $t_1 1838ec07fdf1Sdjm mfhi $t_2 1839ec07fdf1Sdjm $ADDU $c_3,$t_1 1840ec07fdf1Sdjm sltu $at,$c_3,$t_1 1841ec07fdf1Sdjm $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); 1842ec07fdf1Sdjm $ADDU $t_2,$at 1843ec07fdf1Sdjm $ADDU $c_1,$t_2 1844ec07fdf1Sdjm sltu $at,$c_1,$t_2 1845ec07fdf1Sdjm $ADDU $c_2,$at 1846ec07fdf1Sdjm $ST $c_3,5*$BNSZ($a0) 1847ec07fdf1Sdjm 1848ec07fdf1Sdjm mflo $t_1 1849ec07fdf1Sdjm mfhi $t_2 1850ec07fdf1Sdjm $ADDU $c_1,$t_1 1851ec07fdf1Sdjm sltu $at,$c_1,$t_1 1852ec07fdf1Sdjm $ADDU $t_2,$at 1853ec07fdf1Sdjm $ADDU $c_2,$t_2 1854ec07fdf1Sdjm $ST $c_1,6*$BNSZ($a0) 1855ec07fdf1Sdjm $ST $c_2,7*$BNSZ($a0) 1856ec07fdf1Sdjm 1857ec07fdf1Sdjm .set noreorder 1858ec07fdf1Sdjm___ 1859ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 1860ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 1861ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 1862ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 1863ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 1864ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 1865ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 1866ec07fdf1Sdjm___ 1867ec07fdf1Sdjm$code.=<<___; 1868ec07fdf1Sdjm jr $ra 1869ec07fdf1Sdjm nop 1870ec07fdf1Sdjm.end bn_mul_comba4 1871ec07fdf1Sdjm___ 1872ec07fdf1Sdjm 1873ec07fdf1Sdjm($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); 1874ec07fdf1Sdjm 1875e611d49fSbcooksub add_c2 () { 1876e611d49fSbcookmy ($hi,$lo,$c0,$c1,$c2, 1877e611d49fSbcook $warm, # !$warm denotes first call with specific sequence of 1878e611d49fSbcook # $c_[XYZ] when there is no Z-carry to accumulate yet; 1879e611d49fSbcook $an,$bn # these two are arguments for multiplication which 1880e611d49fSbcook # result is used in *next* step [which is why it's 1881e611d49fSbcook # commented as "forward multiplication" below]; 1882e611d49fSbcook )=@_; 1883e611d49fSbcook$code.=<<___; 1884e611d49fSbcook mflo $lo 1885e611d49fSbcook mfhi $hi 1886e611d49fSbcook $ADDU $c0,$lo 1887e611d49fSbcook sltu $at,$c0,$lo 1888e611d49fSbcook $MULTU $an,$bn # forward multiplication 1889e611d49fSbcook $ADDU $c0,$lo 1890e611d49fSbcook $ADDU $at,$hi 1891e611d49fSbcook sltu $lo,$c0,$lo 1892e611d49fSbcook $ADDU $c1,$at 1893e611d49fSbcook $ADDU $hi,$lo 1894e611d49fSbcook___ 1895e611d49fSbcook$code.=<<___ if (!$warm); 1896e611d49fSbcook sltu $c2,$c1,$at 1897e611d49fSbcook $ADDU $c1,$hi 1898e611d49fSbcook sltu $hi,$c1,$hi 1899e611d49fSbcook $ADDU $c2,$hi 1900e611d49fSbcook___ 1901e611d49fSbcook$code.=<<___ if ($warm); 1902e611d49fSbcook sltu $at,$c1,$at 1903e611d49fSbcook $ADDU $c1,$hi 1904e611d49fSbcook $ADDU $c2,$at 1905e611d49fSbcook sltu $hi,$c1,$hi 1906e611d49fSbcook $ADDU $c2,$hi 1907e611d49fSbcook___ 1908e611d49fSbcook} 1909e611d49fSbcook 1910ec07fdf1Sdjm$code.=<<___; 1911ec07fdf1Sdjm 1912ec07fdf1Sdjm.align 5 1913ec07fdf1Sdjm.globl bn_sqr_comba8 1914ec07fdf1Sdjm.ent bn_sqr_comba8 1915ec07fdf1Sdjmbn_sqr_comba8: 1916ec07fdf1Sdjm___ 1917ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 1918ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 1919ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 1920ec07fdf1Sdjm .set noreorder 1921ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 1922ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 1923ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 1924ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 1925ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 1926ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 1927ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 1928ec07fdf1Sdjm___ 1929ec07fdf1Sdjm$code.=<<___; 1930ec07fdf1Sdjm .set reorder 1931ec07fdf1Sdjm $LD $a_0,0($a1) 1932ec07fdf1Sdjm $LD $a_1,$BNSZ($a1) 1933ec07fdf1Sdjm $LD $a_2,2*$BNSZ($a1) 1934ec07fdf1Sdjm $LD $a_3,3*$BNSZ($a1) 1935ec07fdf1Sdjm 1936ec07fdf1Sdjm $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); 1937ec07fdf1Sdjm $LD $a_4,4*$BNSZ($a1) 1938ec07fdf1Sdjm $LD $a_5,5*$BNSZ($a1) 1939ec07fdf1Sdjm $LD $a_6,6*$BNSZ($a1) 1940ec07fdf1Sdjm $LD $a_7,7*$BNSZ($a1) 1941ec07fdf1Sdjm mflo $c_1 1942ec07fdf1Sdjm mfhi $c_2 1943ec07fdf1Sdjm $ST $c_1,0($a0) 1944ec07fdf1Sdjm 1945ec07fdf1Sdjm $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); 1946ec07fdf1Sdjm mflo $t_1 1947ec07fdf1Sdjm mfhi $t_2 1948ec07fdf1Sdjm slt $c_1,$t_2,$zero 1949ec07fdf1Sdjm $SLL $t_2,1 1950ec07fdf1Sdjm $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); 1951ec07fdf1Sdjm slt $a2,$t_1,$zero 1952ec07fdf1Sdjm $ADDU $t_2,$a2 1953ec07fdf1Sdjm $SLL $t_1,1 1954ec07fdf1Sdjm $ADDU $c_2,$t_1 1955ec07fdf1Sdjm sltu $at,$c_2,$t_1 1956ec07fdf1Sdjm $ADDU $c_3,$t_2,$at 1957ec07fdf1Sdjm $ST $c_2,$BNSZ($a0) 1958e611d49fSbcook___ 1959e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, 1960e611d49fSbcook $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); 1961e611d49fSbcook$code.=<<___; 1962ec07fdf1Sdjm mflo $t_1 1963ec07fdf1Sdjm mfhi $t_2 1964ec07fdf1Sdjm $ADDU $c_3,$t_1 1965ec07fdf1Sdjm sltu $at,$c_3,$t_1 1966ec07fdf1Sdjm $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); 1967ec07fdf1Sdjm $ADDU $t_2,$at 1968ec07fdf1Sdjm $ADDU $c_1,$t_2 1969ec07fdf1Sdjm sltu $at,$c_1,$t_2 1970ec07fdf1Sdjm $ADDU $c_2,$at 1971ec07fdf1Sdjm $ST $c_3,2*$BNSZ($a0) 1972e611d49fSbcook___ 1973e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, 1974e611d49fSbcook $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); 1975e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, 1976e611d49fSbcook $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); 1977e611d49fSbcook$code.=<<___; 1978ec07fdf1Sdjm $ST $c_1,3*$BNSZ($a0) 1979e611d49fSbcook___ 1980e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, 1981e611d49fSbcook $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); 1982e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, 1983e611d49fSbcook $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); 1984e611d49fSbcook$code.=<<___; 1985ec07fdf1Sdjm mflo $t_1 1986ec07fdf1Sdjm mfhi $t_2 1987ec07fdf1Sdjm $ADDU $c_2,$t_1 1988ec07fdf1Sdjm sltu $at,$c_2,$t_1 1989ec07fdf1Sdjm $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); 1990ec07fdf1Sdjm $ADDU $t_2,$at 1991ec07fdf1Sdjm $ADDU $c_3,$t_2 1992ec07fdf1Sdjm sltu $at,$c_3,$t_2 1993ec07fdf1Sdjm $ADDU $c_1,$at 1994ec07fdf1Sdjm $ST $c_2,4*$BNSZ($a0) 1995e611d49fSbcook___ 1996e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, 1997e611d49fSbcook $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); 1998e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, 1999e611d49fSbcook $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); 2000e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, 2001e611d49fSbcook $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); 2002e611d49fSbcook$code.=<<___; 2003ec07fdf1Sdjm $ST $c_3,5*$BNSZ($a0) 2004e611d49fSbcook___ 2005e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, 2006e611d49fSbcook $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); 2007e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, 2008e611d49fSbcook $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); 2009e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, 2010e611d49fSbcook $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); 2011e611d49fSbcook$code.=<<___; 2012ec07fdf1Sdjm mflo $t_1 2013ec07fdf1Sdjm mfhi $t_2 2014ec07fdf1Sdjm $ADDU $c_1,$t_1 2015ec07fdf1Sdjm sltu $at,$c_1,$t_1 2016ec07fdf1Sdjm $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); 2017ec07fdf1Sdjm $ADDU $t_2,$at 2018ec07fdf1Sdjm $ADDU $c_2,$t_2 2019ec07fdf1Sdjm sltu $at,$c_2,$t_2 2020ec07fdf1Sdjm $ADDU $c_3,$at 2021ec07fdf1Sdjm $ST $c_1,6*$BNSZ($a0) 2022e611d49fSbcook___ 2023e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, 2024e611d49fSbcook $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); 2025e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, 2026e611d49fSbcook $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); 2027e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, 2028e611d49fSbcook $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); 2029e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, 2030e611d49fSbcook $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); 2031e611d49fSbcook$code.=<<___; 2032ec07fdf1Sdjm $ST $c_2,7*$BNSZ($a0) 2033e611d49fSbcook___ 2034e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, 2035e611d49fSbcook $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); 2036e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, 2037e611d49fSbcook $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); 2038e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, 2039e611d49fSbcook $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); 2040e611d49fSbcook$code.=<<___; 2041ec07fdf1Sdjm mflo $t_1 2042ec07fdf1Sdjm mfhi $t_2 2043ec07fdf1Sdjm $ADDU $c_3,$t_1 2044ec07fdf1Sdjm sltu $at,$c_3,$t_1 2045ec07fdf1Sdjm $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); 2046ec07fdf1Sdjm $ADDU $t_2,$at 2047ec07fdf1Sdjm $ADDU $c_1,$t_2 2048ec07fdf1Sdjm sltu $at,$c_1,$t_2 2049ec07fdf1Sdjm $ADDU $c_2,$at 2050ec07fdf1Sdjm $ST $c_3,8*$BNSZ($a0) 2051e611d49fSbcook___ 2052e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, 2053e611d49fSbcook $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); 2054e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, 2055e611d49fSbcook $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); 2056e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, 2057e611d49fSbcook $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); 2058e611d49fSbcook$code.=<<___; 2059ec07fdf1Sdjm $ST $c_1,9*$BNSZ($a0) 2060e611d49fSbcook___ 2061e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, 2062e611d49fSbcook $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); 2063e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, 2064e611d49fSbcook $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); 2065e611d49fSbcook$code.=<<___; 2066ec07fdf1Sdjm mflo $t_1 2067ec07fdf1Sdjm mfhi $t_2 2068ec07fdf1Sdjm $ADDU $c_2,$t_1 2069ec07fdf1Sdjm sltu $at,$c_2,$t_1 2070ec07fdf1Sdjm $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); 2071ec07fdf1Sdjm $ADDU $t_2,$at 2072ec07fdf1Sdjm $ADDU $c_3,$t_2 2073ec07fdf1Sdjm sltu $at,$c_3,$t_2 2074ec07fdf1Sdjm $ADDU $c_1,$at 2075ec07fdf1Sdjm $ST $c_2,10*$BNSZ($a0) 2076e611d49fSbcook___ 2077e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, 2078e611d49fSbcook $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); 2079e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, 2080e611d49fSbcook $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); 2081e611d49fSbcook$code.=<<___; 2082ec07fdf1Sdjm $ST $c_3,11*$BNSZ($a0) 2083e611d49fSbcook___ 2084e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, 2085e611d49fSbcook $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); 2086e611d49fSbcook$code.=<<___; 2087ec07fdf1Sdjm mflo $t_1 2088ec07fdf1Sdjm mfhi $t_2 2089ec07fdf1Sdjm $ADDU $c_1,$t_1 2090ec07fdf1Sdjm sltu $at,$c_1,$t_1 2091ec07fdf1Sdjm $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); 2092ec07fdf1Sdjm $ADDU $t_2,$at 2093ec07fdf1Sdjm $ADDU $c_2,$t_2 2094ec07fdf1Sdjm sltu $at,$c_2,$t_2 2095ec07fdf1Sdjm $ADDU $c_3,$at 2096ec07fdf1Sdjm $ST $c_1,12*$BNSZ($a0) 2097e611d49fSbcook___ 2098e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, 2099e611d49fSbcook $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); 2100e611d49fSbcook$code.=<<___; 2101ec07fdf1Sdjm $ST $c_2,13*$BNSZ($a0) 2102ec07fdf1Sdjm 2103ec07fdf1Sdjm mflo $t_1 2104ec07fdf1Sdjm mfhi $t_2 2105ec07fdf1Sdjm $ADDU $c_3,$t_1 2106ec07fdf1Sdjm sltu $at,$c_3,$t_1 2107ec07fdf1Sdjm $ADDU $t_2,$at 2108ec07fdf1Sdjm $ADDU $c_1,$t_2 2109ec07fdf1Sdjm $ST $c_3,14*$BNSZ($a0) 2110ec07fdf1Sdjm $ST $c_1,15*$BNSZ($a0) 2111ec07fdf1Sdjm 2112ec07fdf1Sdjm .set noreorder 2113ec07fdf1Sdjm___ 2114ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 2115ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 2116ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 2117ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 2118ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 2119ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 2120ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 2121ec07fdf1Sdjm___ 2122ec07fdf1Sdjm$code.=<<___; 2123ec07fdf1Sdjm jr $ra 2124ec07fdf1Sdjm nop 2125ec07fdf1Sdjm.end bn_sqr_comba8 2126ec07fdf1Sdjm 2127ec07fdf1Sdjm.align 5 2128ec07fdf1Sdjm.globl bn_sqr_comba4 2129ec07fdf1Sdjm.ent bn_sqr_comba4 2130ec07fdf1Sdjmbn_sqr_comba4: 2131ec07fdf1Sdjm___ 2132ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 2133ec07fdf1Sdjm .frame $sp,6*$SZREG,$ra 2134ec07fdf1Sdjm .mask 0x8000f008,-$SZREG 2135ec07fdf1Sdjm .set noreorder 2136ec07fdf1Sdjm $PTR_SUB $sp,6*$SZREG 2137ec07fdf1Sdjm $REG_S $ra,5*$SZREG($sp) 2138ec07fdf1Sdjm $REG_S $t3,4*$SZREG($sp) 2139ec07fdf1Sdjm $REG_S $t2,3*$SZREG($sp) 2140ec07fdf1Sdjm $REG_S $t1,2*$SZREG($sp) 2141ec07fdf1Sdjm $REG_S $t0,1*$SZREG($sp) 2142ec07fdf1Sdjm $REG_S $gp,0*$SZREG($sp) 2143ec07fdf1Sdjm___ 2144ec07fdf1Sdjm$code.=<<___; 2145ec07fdf1Sdjm .set reorder 2146ec07fdf1Sdjm $LD $a_0,0($a1) 2147ec07fdf1Sdjm $LD $a_1,$BNSZ($a1) 2148ec07fdf1Sdjm $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); 2149ec07fdf1Sdjm $LD $a_2,2*$BNSZ($a1) 2150ec07fdf1Sdjm $LD $a_3,3*$BNSZ($a1) 2151ec07fdf1Sdjm mflo $c_1 2152ec07fdf1Sdjm mfhi $c_2 2153ec07fdf1Sdjm $ST $c_1,0($a0) 2154ec07fdf1Sdjm 2155ec07fdf1Sdjm $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); 2156ec07fdf1Sdjm mflo $t_1 2157ec07fdf1Sdjm mfhi $t_2 2158ec07fdf1Sdjm slt $c_1,$t_2,$zero 2159ec07fdf1Sdjm $SLL $t_2,1 2160ec07fdf1Sdjm $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); 2161ec07fdf1Sdjm slt $a2,$t_1,$zero 2162ec07fdf1Sdjm $ADDU $t_2,$a2 2163ec07fdf1Sdjm $SLL $t_1,1 2164ec07fdf1Sdjm $ADDU $c_2,$t_1 2165ec07fdf1Sdjm sltu $at,$c_2,$t_1 2166ec07fdf1Sdjm $ADDU $c_3,$t_2,$at 2167ec07fdf1Sdjm $ST $c_2,$BNSZ($a0) 2168e611d49fSbcook___ 2169e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, 2170e611d49fSbcook $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); 2171e611d49fSbcook$code.=<<___; 2172ec07fdf1Sdjm mflo $t_1 2173ec07fdf1Sdjm mfhi $t_2 2174ec07fdf1Sdjm $ADDU $c_3,$t_1 2175ec07fdf1Sdjm sltu $at,$c_3,$t_1 2176ec07fdf1Sdjm $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); 2177ec07fdf1Sdjm $ADDU $t_2,$at 2178ec07fdf1Sdjm $ADDU $c_1,$t_2 2179ec07fdf1Sdjm sltu $at,$c_1,$t_2 2180ec07fdf1Sdjm $ADDU $c_2,$at 2181ec07fdf1Sdjm $ST $c_3,2*$BNSZ($a0) 2182e611d49fSbcook___ 2183e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, 2184e611d49fSbcook $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); 2185e611d49fSbcook &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, 2186e611d49fSbcook $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); 2187e611d49fSbcook$code.=<<___; 2188ec07fdf1Sdjm $ST $c_1,3*$BNSZ($a0) 2189e611d49fSbcook___ 2190e611d49fSbcook &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, 2191e611d49fSbcook $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); 2192e611d49fSbcook$code.=<<___; 2193ec07fdf1Sdjm mflo $t_1 2194ec07fdf1Sdjm mfhi $t_2 2195ec07fdf1Sdjm $ADDU $c_2,$t_1 2196ec07fdf1Sdjm sltu $at,$c_2,$t_1 2197ec07fdf1Sdjm $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); 2198ec07fdf1Sdjm $ADDU $t_2,$at 2199ec07fdf1Sdjm $ADDU $c_3,$t_2 2200ec07fdf1Sdjm sltu $at,$c_3,$t_2 2201ec07fdf1Sdjm $ADDU $c_1,$at 2202ec07fdf1Sdjm $ST $c_2,4*$BNSZ($a0) 2203e611d49fSbcook___ 2204e611d49fSbcook &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, 2205e611d49fSbcook $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); 2206e611d49fSbcook$code.=<<___; 2207ec07fdf1Sdjm $ST $c_3,5*$BNSZ($a0) 2208ec07fdf1Sdjm 2209ec07fdf1Sdjm mflo $t_1 2210ec07fdf1Sdjm mfhi $t_2 2211ec07fdf1Sdjm $ADDU $c_1,$t_1 2212ec07fdf1Sdjm sltu $at,$c_1,$t_1 2213ec07fdf1Sdjm $ADDU $t_2,$at 2214ec07fdf1Sdjm $ADDU $c_2,$t_2 2215ec07fdf1Sdjm $ST $c_1,6*$BNSZ($a0) 2216ec07fdf1Sdjm $ST $c_2,7*$BNSZ($a0) 2217ec07fdf1Sdjm 2218ec07fdf1Sdjm .set noreorder 2219ec07fdf1Sdjm___ 2220ec07fdf1Sdjm$code.=<<___ if ($flavour =~ /nubi/i); 2221ec07fdf1Sdjm $REG_L $t3,4*$SZREG($sp) 2222ec07fdf1Sdjm $REG_L $t2,3*$SZREG($sp) 2223ec07fdf1Sdjm $REG_L $t1,2*$SZREG($sp) 2224ec07fdf1Sdjm $REG_L $t0,1*$SZREG($sp) 2225ec07fdf1Sdjm $REG_L $gp,0*$SZREG($sp) 2226ec07fdf1Sdjm $PTR_ADD $sp,6*$SZREG 2227ec07fdf1Sdjm___ 2228ec07fdf1Sdjm$code.=<<___; 2229ec07fdf1Sdjm jr $ra 2230ec07fdf1Sdjm nop 2231ec07fdf1Sdjm.end bn_sqr_comba4 2232ec07fdf1Sdjm___ 2233ec07fdf1Sdjmprint $code; 2234ec07fdf1Sdjmclose STDOUT; 2235