1e0ea3921Schristos#include "mips_arch.h" 2e0ea3921Schristos 3e0ea3921Schristos#if defined(_MIPS_ARCH_MIPS64R6) 4e0ea3921Schristos# define ddivu(rs,rt) 5e0ea3921Schristos# define mfqt(rd,rs,rt) ddivu rd,rs,rt 6e0ea3921Schristos# define mfrm(rd,rs,rt) dmodu rd,rs,rt 7e0ea3921Schristos#elif defined(_MIPS_ARCH_MIPS32R6) 8e0ea3921Schristos# define divu(rs,rt) 9e0ea3921Schristos# define mfqt(rd,rs,rt) divu rd,rs,rt 10e0ea3921Schristos# define mfrm(rd,rs,rt) modu rd,rs,rt 11e0ea3921Schristos#else 12e0ea3921Schristos# define ddivu(rs,rt) ddivu $0,rs,rt 13e0ea3921Schristos# define mfqt(rd,rs,rt) mflo rd 14e0ea3921Schristos# define mfrm(rd,rs,rt) mfhi rd 15e0ea3921Schristos#endif 16e0ea3921Schristos 176410c867Schristos.rdata 186410c867Schristos.asciiz "mips3.s, Version 1.2" 196410c867Schristos.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 206410c867Schristos 216410c867Schristos.text 226410c867Schristos.set noat 236410c867Schristos 246410c867Schristos.align 5 256410c867Schristos.globl bn_mul_add_words 266410c867Schristos.ent bn_mul_add_words 276410c867Schristosbn_mul_add_words: 286410c867Schristos .set noreorder 296410c867Schristos bgtz $6,bn_mul_add_words_internal 306410c867Schristos move $2,$0 316410c867Schristos jr $31 326410c867Schristos move $4,$2 336410c867Schristos.end bn_mul_add_words 346410c867Schristos 356410c867Schristos.align 5 366410c867Schristos.ent bn_mul_add_words_internal 376410c867Schristosbn_mul_add_words_internal: 386410c867Schristos .set reorder 396410c867Schristos li $3,-4 406410c867Schristos and $8,$6,$3 416410c867Schristos beqz $8,.L_bn_mul_add_words_tail 426410c867Schristos 436410c867Schristos.L_bn_mul_add_words_loop: 446410c867Schristos ld $12,0($5) 45e0ea3921Schristos dmultu ($12,$7) 466410c867Schristos ld $13,0($4) 476410c867Schristos ld $14,8($5) 486410c867Schristos ld $15,8($4) 496410c867Schristos ld $8,2*8($5) 506410c867Schristos ld $9,2*8($4) 516410c867Schristos daddu $13,$2 526410c867Schristos sltu $2,$13,$2 # All manuals say it "compares 32-bit 536410c867Schristos # values", but it seems to work fine 546410c867Schristos # even on 64-bit registers. 55e0ea3921Schristos mflo ($1,$12,$7) 56e0ea3921Schristos mfhi ($12,$12,$7) 576410c867Schristos daddu $13,$1 586410c867Schristos daddu $2,$12 59e0ea3921Schristos dmultu ($14,$7) 606410c867Schristos sltu $1,$13,$1 616410c867Schristos sd $13,0($4) 626410c867Schristos daddu $2,$1 636410c867Schristos 646410c867Schristos ld $10,3*8($5) 656410c867Schristos ld $11,3*8($4) 666410c867Schristos daddu $15,$2 676410c867Schristos sltu $2,$15,$2 68e0ea3921Schristos mflo ($1,$14,$7) 69e0ea3921Schristos mfhi ($14,$14,$7) 706410c867Schristos daddu $15,$1 716410c867Schristos daddu $2,$14 72e0ea3921Schristos dmultu ($8,$7) 736410c867Schristos sltu $1,$15,$1 746410c867Schristos sd $15,8($4) 756410c867Schristos daddu $2,$1 766410c867Schristos 776410c867Schristos subu $6,4 786410c867Schristos daddu $4,4*8 796410c867Schristos daddu $5,4*8 806410c867Schristos daddu $9,$2 816410c867Schristos sltu $2,$9,$2 82e0ea3921Schristos mflo ($1,$8,$7) 83e0ea3921Schristos mfhi ($8,$8,$7) 846410c867Schristos daddu $9,$1 856410c867Schristos daddu $2,$8 86e0ea3921Schristos dmultu ($10,$7) 876410c867Schristos sltu $1,$9,$1 886410c867Schristos sd $9,-2*8($4) 896410c867Schristos daddu $2,$1 906410c867Schristos 916410c867Schristos 926410c867Schristos and $8,$6,$3 936410c867Schristos daddu $11,$2 946410c867Schristos sltu $2,$11,$2 95e0ea3921Schristos mflo ($1,$10,$7) 96e0ea3921Schristos mfhi ($10,$10,$7) 976410c867Schristos daddu $11,$1 986410c867Schristos daddu $2,$10 996410c867Schristos sltu $1,$11,$1 1006410c867Schristos sd $11,-8($4) 1016410c867Schristos .set noreorder 1026410c867Schristos bgtz $8,.L_bn_mul_add_words_loop 1036410c867Schristos daddu $2,$1 1046410c867Schristos 1056410c867Schristos beqz $6,.L_bn_mul_add_words_return 1066410c867Schristos nop 1076410c867Schristos 1086410c867Schristos.L_bn_mul_add_words_tail: 1096410c867Schristos .set reorder 1106410c867Schristos ld $12,0($5) 111e0ea3921Schristos dmultu ($12,$7) 1126410c867Schristos ld $13,0($4) 1136410c867Schristos subu $6,1 1146410c867Schristos daddu $13,$2 1156410c867Schristos sltu $2,$13,$2 116e0ea3921Schristos mflo ($1,$12,$7) 117e0ea3921Schristos mfhi ($12,$12,$7) 1186410c867Schristos daddu $13,$1 1196410c867Schristos daddu $2,$12 1206410c867Schristos sltu $1,$13,$1 1216410c867Schristos sd $13,0($4) 1226410c867Schristos daddu $2,$1 1236410c867Schristos beqz $6,.L_bn_mul_add_words_return 1246410c867Schristos 1256410c867Schristos ld $12,8($5) 126e0ea3921Schristos dmultu ($12,$7) 1276410c867Schristos ld $13,8($4) 1286410c867Schristos subu $6,1 1296410c867Schristos daddu $13,$2 1306410c867Schristos sltu $2,$13,$2 131e0ea3921Schristos mflo ($1,$12,$7) 132e0ea3921Schristos mfhi ($12,$12,$7) 1336410c867Schristos daddu $13,$1 1346410c867Schristos daddu $2,$12 1356410c867Schristos sltu $1,$13,$1 1366410c867Schristos sd $13,8($4) 1376410c867Schristos daddu $2,$1 1386410c867Schristos beqz $6,.L_bn_mul_add_words_return 1396410c867Schristos 1406410c867Schristos ld $12,2*8($5) 141e0ea3921Schristos dmultu ($12,$7) 1426410c867Schristos ld $13,2*8($4) 1436410c867Schristos daddu $13,$2 1446410c867Schristos sltu $2,$13,$2 145e0ea3921Schristos mflo ($1,$12,$7) 146e0ea3921Schristos mfhi ($12,$12,$7) 1476410c867Schristos daddu $13,$1 1486410c867Schristos daddu $2,$12 1496410c867Schristos sltu $1,$13,$1 1506410c867Schristos sd $13,2*8($4) 1516410c867Schristos daddu $2,$1 1526410c867Schristos 1536410c867Schristos.L_bn_mul_add_words_return: 1546410c867Schristos .set noreorder 1556410c867Schristos jr $31 1566410c867Schristos move $4,$2 1576410c867Schristos.end bn_mul_add_words_internal 1586410c867Schristos 1596410c867Schristos.align 5 1606410c867Schristos.globl bn_mul_words 1616410c867Schristos.ent bn_mul_words 1626410c867Schristosbn_mul_words: 1636410c867Schristos .set noreorder 1646410c867Schristos bgtz $6,bn_mul_words_internal 1656410c867Schristos move $2,$0 1666410c867Schristos jr $31 1676410c867Schristos move $4,$2 1686410c867Schristos.end bn_mul_words 1696410c867Schristos 1706410c867Schristos.align 5 1716410c867Schristos.ent bn_mul_words_internal 1726410c867Schristosbn_mul_words_internal: 1736410c867Schristos .set reorder 1746410c867Schristos li $3,-4 1756410c867Schristos and $8,$6,$3 1766410c867Schristos beqz $8,.L_bn_mul_words_tail 1776410c867Schristos 1786410c867Schristos.L_bn_mul_words_loop: 1796410c867Schristos ld $12,0($5) 180e0ea3921Schristos dmultu ($12,$7) 1816410c867Schristos ld $14,8($5) 1826410c867Schristos ld $8,2*8($5) 1836410c867Schristos ld $10,3*8($5) 184e0ea3921Schristos mflo ($1,$12,$7) 185e0ea3921Schristos mfhi ($12,$12,$7) 1866410c867Schristos daddu $2,$1 1876410c867Schristos sltu $13,$2,$1 188e0ea3921Schristos dmultu ($14,$7) 1896410c867Schristos sd $2,0($4) 1906410c867Schristos daddu $2,$13,$12 1916410c867Schristos 1926410c867Schristos subu $6,4 1936410c867Schristos daddu $4,4*8 1946410c867Schristos daddu $5,4*8 195e0ea3921Schristos mflo ($1,$14,$7) 196e0ea3921Schristos mfhi ($14,$14,$7) 1976410c867Schristos daddu $2,$1 1986410c867Schristos sltu $15,$2,$1 199e0ea3921Schristos dmultu ($8,$7) 2006410c867Schristos sd $2,-3*8($4) 2016410c867Schristos daddu $2,$15,$14 2026410c867Schristos 203e0ea3921Schristos mflo ($1,$8,$7) 204e0ea3921Schristos mfhi ($8,$8,$7) 2056410c867Schristos daddu $2,$1 2066410c867Schristos sltu $9,$2,$1 207e0ea3921Schristos dmultu ($10,$7) 2086410c867Schristos sd $2,-2*8($4) 2096410c867Schristos daddu $2,$9,$8 2106410c867Schristos 2116410c867Schristos and $8,$6,$3 212e0ea3921Schristos mflo ($1,$10,$7) 213e0ea3921Schristos mfhi ($10,$10,$7) 2146410c867Schristos daddu $2,$1 2156410c867Schristos sltu $11,$2,$1 2166410c867Schristos sd $2,-8($4) 2176410c867Schristos .set noreorder 2186410c867Schristos bgtz $8,.L_bn_mul_words_loop 2196410c867Schristos daddu $2,$11,$10 2206410c867Schristos 2216410c867Schristos beqz $6,.L_bn_mul_words_return 2226410c867Schristos nop 2236410c867Schristos 2246410c867Schristos.L_bn_mul_words_tail: 2256410c867Schristos .set reorder 2266410c867Schristos ld $12,0($5) 227e0ea3921Schristos dmultu ($12,$7) 2286410c867Schristos subu $6,1 229e0ea3921Schristos mflo ($1,$12,$7) 230e0ea3921Schristos mfhi ($12,$12,$7) 2316410c867Schristos daddu $2,$1 2326410c867Schristos sltu $13,$2,$1 2336410c867Schristos sd $2,0($4) 2346410c867Schristos daddu $2,$13,$12 2356410c867Schristos beqz $6,.L_bn_mul_words_return 2366410c867Schristos 2376410c867Schristos ld $12,8($5) 238e0ea3921Schristos dmultu ($12,$7) 2396410c867Schristos subu $6,1 240e0ea3921Schristos mflo ($1,$12,$7) 241e0ea3921Schristos mfhi ($12,$12,$7) 2426410c867Schristos daddu $2,$1 2436410c867Schristos sltu $13,$2,$1 2446410c867Schristos sd $2,8($4) 2456410c867Schristos daddu $2,$13,$12 2466410c867Schristos beqz $6,.L_bn_mul_words_return 2476410c867Schristos 2486410c867Schristos ld $12,2*8($5) 249e0ea3921Schristos dmultu ($12,$7) 250e0ea3921Schristos mflo ($1,$12,$7) 251e0ea3921Schristos mfhi ($12,$12,$7) 2526410c867Schristos daddu $2,$1 2536410c867Schristos sltu $13,$2,$1 2546410c867Schristos sd $2,2*8($4) 2556410c867Schristos daddu $2,$13,$12 2566410c867Schristos 2576410c867Schristos.L_bn_mul_words_return: 2586410c867Schristos .set noreorder 2596410c867Schristos jr $31 2606410c867Schristos move $4,$2 2616410c867Schristos.end bn_mul_words_internal 2626410c867Schristos 2636410c867Schristos.align 5 2646410c867Schristos.globl bn_sqr_words 2656410c867Schristos.ent bn_sqr_words 2666410c867Schristosbn_sqr_words: 2676410c867Schristos .set noreorder 2686410c867Schristos bgtz $6,bn_sqr_words_internal 2696410c867Schristos move $2,$0 2706410c867Schristos jr $31 2716410c867Schristos move $4,$2 2726410c867Schristos.end bn_sqr_words 2736410c867Schristos 2746410c867Schristos.align 5 2756410c867Schristos.ent bn_sqr_words_internal 2766410c867Schristosbn_sqr_words_internal: 2776410c867Schristos .set reorder 2786410c867Schristos li $3,-4 2796410c867Schristos and $8,$6,$3 2806410c867Schristos beqz $8,.L_bn_sqr_words_tail 2816410c867Schristos 2826410c867Schristos.L_bn_sqr_words_loop: 2836410c867Schristos ld $12,0($5) 284e0ea3921Schristos dmultu ($12,$12) 2856410c867Schristos ld $14,8($5) 2866410c867Schristos ld $8,2*8($5) 2876410c867Schristos ld $10,3*8($5) 288e0ea3921Schristos mflo ($13,$12,$12) 289e0ea3921Schristos mfhi ($12,$12,$12) 2906410c867Schristos sd $13,0($4) 2916410c867Schristos sd $12,8($4) 2926410c867Schristos 293e0ea3921Schristos dmultu ($14,$14) 2946410c867Schristos subu $6,4 2956410c867Schristos daddu $4,8*8 2966410c867Schristos daddu $5,4*8 297e0ea3921Schristos mflo ($15,$14,$14) 298e0ea3921Schristos mfhi ($14,$14,$14) 2996410c867Schristos sd $15,-6*8($4) 3006410c867Schristos sd $14,-5*8($4) 3016410c867Schristos 302e0ea3921Schristos dmultu ($8,$8) 303e0ea3921Schristos mflo ($9,$8,$8) 304e0ea3921Schristos mfhi ($8,$8,$8) 3056410c867Schristos sd $9,-4*8($4) 3066410c867Schristos sd $8,-3*8($4) 3076410c867Schristos 3086410c867Schristos 309e0ea3921Schristos dmultu ($10,$10) 3106410c867Schristos and $8,$6,$3 311e0ea3921Schristos mflo ($11,$10,$10) 312e0ea3921Schristos mfhi ($10,$10,$10) 3136410c867Schristos sd $11,-2*8($4) 3146410c867Schristos 3156410c867Schristos .set noreorder 3161e125808Schristos bgtz $8,.L_bn_sqr_words_loop 3172247b70aSchristos sd $10,-8($4) 3186410c867Schristos 3196410c867Schristos beqz $6,.L_bn_sqr_words_return 3206410c867Schristos nop 3216410c867Schristos 3226410c867Schristos.L_bn_sqr_words_tail: 3236410c867Schristos .set reorder 3246410c867Schristos ld $12,0($5) 325e0ea3921Schristos dmultu ($12,$12) 3266410c867Schristos subu $6,1 327e0ea3921Schristos mflo ($13,$12,$12) 328e0ea3921Schristos mfhi ($12,$12,$12) 3296410c867Schristos sd $13,0($4) 3306410c867Schristos sd $12,8($4) 3316410c867Schristos beqz $6,.L_bn_sqr_words_return 3326410c867Schristos 3336410c867Schristos ld $12,8($5) 334e0ea3921Schristos dmultu ($12,$12) 3356410c867Schristos subu $6,1 336e0ea3921Schristos mflo ($13,$12,$12) 337e0ea3921Schristos mfhi ($12,$12,$12) 3386410c867Schristos sd $13,2*8($4) 3396410c867Schristos sd $12,3*8($4) 3406410c867Schristos beqz $6,.L_bn_sqr_words_return 3416410c867Schristos 3426410c867Schristos ld $12,2*8($5) 343e0ea3921Schristos dmultu ($12,$12) 344e0ea3921Schristos mflo ($13,$12,$12) 345e0ea3921Schristos mfhi ($12,$12,$12) 3466410c867Schristos sd $13,4*8($4) 3476410c867Schristos sd $12,5*8($4) 3486410c867Schristos 3496410c867Schristos.L_bn_sqr_words_return: 3506410c867Schristos .set noreorder 3516410c867Schristos jr $31 3526410c867Schristos move $4,$2 3536410c867Schristos 3546410c867Schristos.end bn_sqr_words_internal 3556410c867Schristos 3566410c867Schristos.align 5 3576410c867Schristos.globl bn_add_words 3586410c867Schristos.ent bn_add_words 3596410c867Schristosbn_add_words: 3606410c867Schristos .set noreorder 3616410c867Schristos bgtz $7,bn_add_words_internal 3626410c867Schristos move $2,$0 3636410c867Schristos jr $31 3646410c867Schristos move $4,$2 3656410c867Schristos.end bn_add_words 3666410c867Schristos 3676410c867Schristos.align 5 3686410c867Schristos.ent bn_add_words_internal 3696410c867Schristosbn_add_words_internal: 3706410c867Schristos .set reorder 3716410c867Schristos li $3,-4 3726410c867Schristos and $1,$7,$3 3736410c867Schristos beqz $1,.L_bn_add_words_tail 3746410c867Schristos 3756410c867Schristos.L_bn_add_words_loop: 3766410c867Schristos ld $12,0($5) 3776410c867Schristos ld $8,0($6) 3786410c867Schristos subu $7,4 3796410c867Schristos ld $13,8($5) 3806410c867Schristos and $1,$7,$3 3816410c867Schristos ld $14,2*8($5) 3826410c867Schristos daddu $6,4*8 3836410c867Schristos ld $15,3*8($5) 3846410c867Schristos daddu $4,4*8 3856410c867Schristos ld $9,-3*8($6) 3866410c867Schristos daddu $5,4*8 3876410c867Schristos ld $10,-2*8($6) 3886410c867Schristos ld $11,-8($6) 3896410c867Schristos daddu $8,$12 3906410c867Schristos sltu $24,$8,$12 3916410c867Schristos daddu $12,$8,$2 3926410c867Schristos sltu $2,$12,$8 3936410c867Schristos sd $12,-4*8($4) 3946410c867Schristos daddu $2,$24 3956410c867Schristos 3966410c867Schristos daddu $9,$13 3976410c867Schristos sltu $25,$9,$13 3986410c867Schristos daddu $13,$9,$2 3996410c867Schristos sltu $2,$13,$9 4006410c867Schristos sd $13,-3*8($4) 4016410c867Schristos daddu $2,$25 4026410c867Schristos 4036410c867Schristos daddu $10,$14 4046410c867Schristos sltu $24,$10,$14 4056410c867Schristos daddu $14,$10,$2 4066410c867Schristos sltu $2,$14,$10 4076410c867Schristos sd $14,-2*8($4) 4086410c867Schristos daddu $2,$24 4096410c867Schristos 4106410c867Schristos daddu $11,$15 4116410c867Schristos sltu $25,$11,$15 4126410c867Schristos daddu $15,$11,$2 4136410c867Schristos sltu $2,$15,$11 4146410c867Schristos sd $15,-8($4) 4156410c867Schristos 4166410c867Schristos .set noreorder 4176410c867Schristos bgtz $1,.L_bn_add_words_loop 4186410c867Schristos daddu $2,$25 4196410c867Schristos 4206410c867Schristos beqz $7,.L_bn_add_words_return 4216410c867Schristos nop 4226410c867Schristos 4236410c867Schristos.L_bn_add_words_tail: 4246410c867Schristos .set reorder 4256410c867Schristos ld $12,0($5) 4266410c867Schristos ld $8,0($6) 4276410c867Schristos daddu $8,$12 4286410c867Schristos subu $7,1 4296410c867Schristos sltu $24,$8,$12 4306410c867Schristos daddu $12,$8,$2 4316410c867Schristos sltu $2,$12,$8 4326410c867Schristos sd $12,0($4) 4336410c867Schristos daddu $2,$24 4346410c867Schristos beqz $7,.L_bn_add_words_return 4356410c867Schristos 4366410c867Schristos ld $13,8($5) 4376410c867Schristos ld $9,8($6) 4386410c867Schristos daddu $9,$13 4396410c867Schristos subu $7,1 4406410c867Schristos sltu $25,$9,$13 4416410c867Schristos daddu $13,$9,$2 4426410c867Schristos sltu $2,$13,$9 4436410c867Schristos sd $13,8($4) 4446410c867Schristos daddu $2,$25 4456410c867Schristos beqz $7,.L_bn_add_words_return 4466410c867Schristos 4476410c867Schristos ld $14,2*8($5) 4486410c867Schristos ld $10,2*8($6) 4496410c867Schristos daddu $10,$14 4506410c867Schristos sltu $24,$10,$14 4516410c867Schristos daddu $14,$10,$2 4526410c867Schristos sltu $2,$14,$10 4536410c867Schristos sd $14,2*8($4) 4546410c867Schristos daddu $2,$24 4556410c867Schristos 4566410c867Schristos.L_bn_add_words_return: 4576410c867Schristos .set noreorder 4586410c867Schristos jr $31 4596410c867Schristos move $4,$2 4606410c867Schristos 4616410c867Schristos.end bn_add_words_internal 4626410c867Schristos 4636410c867Schristos.align 5 4646410c867Schristos.globl bn_sub_words 4656410c867Schristos.ent bn_sub_words 4666410c867Schristosbn_sub_words: 4676410c867Schristos .set noreorder 4686410c867Schristos bgtz $7,bn_sub_words_internal 4696410c867Schristos move $2,$0 4706410c867Schristos jr $31 4716410c867Schristos move $4,$0 4726410c867Schristos.end bn_sub_words 4736410c867Schristos 4746410c867Schristos.align 5 4756410c867Schristos.ent bn_sub_words_internal 4766410c867Schristosbn_sub_words_internal: 4776410c867Schristos .set reorder 4786410c867Schristos li $3,-4 4796410c867Schristos and $1,$7,$3 4806410c867Schristos beqz $1,.L_bn_sub_words_tail 4816410c867Schristos 4826410c867Schristos.L_bn_sub_words_loop: 4836410c867Schristos ld $12,0($5) 4846410c867Schristos ld $8,0($6) 4856410c867Schristos subu $7,4 4866410c867Schristos ld $13,8($5) 4876410c867Schristos and $1,$7,$3 4886410c867Schristos ld $14,2*8($5) 4896410c867Schristos daddu $6,4*8 4906410c867Schristos ld $15,3*8($5) 4916410c867Schristos daddu $4,4*8 4926410c867Schristos ld $9,-3*8($6) 4936410c867Schristos daddu $5,4*8 4946410c867Schristos ld $10,-2*8($6) 4956410c867Schristos ld $11,-8($6) 4966410c867Schristos sltu $24,$12,$8 4976410c867Schristos dsubu $8,$12,$8 4986410c867Schristos dsubu $12,$8,$2 4996410c867Schristos sgtu $2,$12,$8 5006410c867Schristos sd $12,-4*8($4) 5016410c867Schristos daddu $2,$24 5026410c867Schristos 5036410c867Schristos sltu $25,$13,$9 5046410c867Schristos dsubu $9,$13,$9 5056410c867Schristos dsubu $13,$9,$2 5066410c867Schristos sgtu $2,$13,$9 5076410c867Schristos sd $13,-3*8($4) 5086410c867Schristos daddu $2,$25 5096410c867Schristos 5106410c867Schristos 5116410c867Schristos sltu $24,$14,$10 5126410c867Schristos dsubu $10,$14,$10 5136410c867Schristos dsubu $14,$10,$2 5146410c867Schristos sgtu $2,$14,$10 5156410c867Schristos sd $14,-2*8($4) 5166410c867Schristos daddu $2,$24 5176410c867Schristos 5186410c867Schristos sltu $25,$15,$11 5196410c867Schristos dsubu $11,$15,$11 5206410c867Schristos dsubu $15,$11,$2 5216410c867Schristos sgtu $2,$15,$11 5226410c867Schristos sd $15,-8($4) 5236410c867Schristos 5246410c867Schristos .set noreorder 5256410c867Schristos bgtz $1,.L_bn_sub_words_loop 5266410c867Schristos daddu $2,$25 5276410c867Schristos 5286410c867Schristos beqz $7,.L_bn_sub_words_return 5296410c867Schristos nop 5306410c867Schristos 5316410c867Schristos.L_bn_sub_words_tail: 5326410c867Schristos .set reorder 5336410c867Schristos ld $12,0($5) 5346410c867Schristos ld $8,0($6) 5356410c867Schristos subu $7,1 5366410c867Schristos sltu $24,$12,$8 5376410c867Schristos dsubu $8,$12,$8 5386410c867Schristos dsubu $12,$8,$2 5396410c867Schristos sgtu $2,$12,$8 5406410c867Schristos sd $12,0($4) 5416410c867Schristos daddu $2,$24 5426410c867Schristos beqz $7,.L_bn_sub_words_return 5436410c867Schristos 5446410c867Schristos ld $13,8($5) 5456410c867Schristos subu $7,1 5466410c867Schristos ld $9,8($6) 5476410c867Schristos sltu $25,$13,$9 5486410c867Schristos dsubu $9,$13,$9 5496410c867Schristos dsubu $13,$9,$2 5506410c867Schristos sgtu $2,$13,$9 5516410c867Schristos sd $13,8($4) 5526410c867Schristos daddu $2,$25 5536410c867Schristos beqz $7,.L_bn_sub_words_return 5546410c867Schristos 5556410c867Schristos ld $14,2*8($5) 5566410c867Schristos ld $10,2*8($6) 5576410c867Schristos sltu $24,$14,$10 5586410c867Schristos dsubu $10,$14,$10 5596410c867Schristos dsubu $14,$10,$2 5606410c867Schristos sgtu $2,$14,$10 5616410c867Schristos sd $14,2*8($4) 5626410c867Schristos daddu $2,$24 5636410c867Schristos 5646410c867Schristos.L_bn_sub_words_return: 5656410c867Schristos .set noreorder 5666410c867Schristos jr $31 5676410c867Schristos move $4,$2 5686410c867Schristos.end bn_sub_words_internal 5696410c867Schristos 57052629741Schristos#if 0 57152629741Schristos/* 57252629741Schristos * The bn_div_3_words entry point is re-used for constant-time interface. 57352629741Schristos * Implementation is retained as historical reference. 57452629741Schristos */ 5756410c867Schristos.align 5 5766410c867Schristos.globl bn_div_3_words 5776410c867Schristos.ent bn_div_3_words 5786410c867Schristosbn_div_3_words: 5796410c867Schristos .set noreorder 5806410c867Schristos move $7,$4 # we know that bn_div_words does not 5816410c867Schristos # touch $7, $10, $11 and preserves $6 5826410c867Schristos # so that we can save two arguments 5836410c867Schristos # and return address in registers 5846410c867Schristos # instead of stack:-) 5856410c867Schristos 5866410c867Schristos ld $4,($7) 5876410c867Schristos move $10,$5 5881e125808Schristos bne $4,$6,bn_div_3_words_internal 5892247b70aSchristos ld $5,-8($7) 5906410c867Schristos li $2,-1 5916410c867Schristos jr $31 5926410c867Schristos move $4,$2 5936410c867Schristos.end bn_div_3_words 5946410c867Schristos 5956410c867Schristos.align 5 5966410c867Schristos.ent bn_div_3_words_internal 5976410c867Schristosbn_div_3_words_internal: 5986410c867Schristos .set reorder 5996410c867Schristos move $11,$31 6006410c867Schristos bal bn_div_words_internal 6016410c867Schristos move $31,$11 602e0ea3921Schristos dmultu ($10,$2) 6036410c867Schristos ld $14,-2*8($7) 6046410c867Schristos move $8,$0 605e0ea3921Schristos mfhi ($13,$10,$2) 606e0ea3921Schristos mflo ($12,$10,$2) 6076410c867Schristos sltu $24,$13,$5 6086410c867Schristos.L_bn_div_3_words_inner_loop: 6096410c867Schristos bnez $24,.L_bn_div_3_words_inner_loop_done 6106410c867Schristos sgeu $1,$14,$12 6116410c867Schristos seq $25,$13,$5 6126410c867Schristos and $1,$25 6136410c867Schristos sltu $15,$12,$10 6146410c867Schristos daddu $5,$6 6156410c867Schristos dsubu $13,$15 6166410c867Schristos dsubu $12,$10 6176410c867Schristos sltu $24,$13,$5 6186410c867Schristos sltu $8,$5,$6 6196410c867Schristos or $24,$8 6206410c867Schristos .set noreorder 6216410c867Schristos beqz $1,.L_bn_div_3_words_inner_loop 6226410c867Schristos dsubu $2,1 6236410c867Schristos daddu $2,1 6246410c867Schristos .set reorder 6256410c867Schristos.L_bn_div_3_words_inner_loop_done: 6266410c867Schristos .set noreorder 6276410c867Schristos jr $31 6286410c867Schristos move $4,$2 6296410c867Schristos.end bn_div_3_words_internal 63052629741Schristos#endif 6316410c867Schristos 6326410c867Schristos.align 5 6336410c867Schristos.globl bn_div_words 6346410c867Schristos.ent bn_div_words 6356410c867Schristosbn_div_words: 6366410c867Schristos .set noreorder 6376410c867Schristos bnez $6,bn_div_words_internal 6386410c867Schristos li $2,-1 # I would rather signal div-by-zero 6396410c867Schristos # which can be done with 'break 7' 6406410c867Schristos jr $31 6416410c867Schristos move $4,$2 6426410c867Schristos.end bn_div_words 6436410c867Schristos 6446410c867Schristos.align 5 6456410c867Schristos.ent bn_div_words_internal 6466410c867Schristosbn_div_words_internal: 6476410c867Schristos move $3,$0 6486410c867Schristos bltz $6,.L_bn_div_words_body 6496410c867Schristos move $25,$3 6506410c867Schristos dsll $6,1 6516410c867Schristos bgtz $6,.-4 6526410c867Schristos addu $25,1 6536410c867Schristos 6546410c867Schristos .set reorder 6556410c867Schristos negu $13,$25 6566410c867Schristos li $14,-1 6576410c867Schristos dsll $14,$13 6586410c867Schristos and $14,$4 6596410c867Schristos dsrl $1,$5,$13 6606410c867Schristos .set noreorder 6616410c867Schristos beqz $14,.+12 6626410c867Schristos nop 6636410c867Schristos break 6 # signal overflow 6646410c867Schristos .set reorder 6656410c867Schristos dsll $4,$25 6666410c867Schristos dsll $5,$25 6676410c867Schristos or $4,$1 6686410c867Schristos.L_bn_div_words_body: 6696410c867Schristos dsrl $3,$6,4*8 # bits 6706410c867Schristos sgeu $1,$4,$6 6716410c867Schristos .set noreorder 6726410c867Schristos beqz $1,.+12 6736410c867Schristos nop 6746410c867Schristos dsubu $4,$6 6756410c867Schristos .set reorder 6766410c867Schristos 6776410c867Schristos li $8,-1 6786410c867Schristos dsrl $9,$4,4*8 # bits 6796410c867Schristos dsrl $8,4*8 # q=0xffffffff 6806410c867Schristos beq $3,$9,.L_bn_div_words_skip_div1 681e0ea3921Schristos ddivu ($4,$3) 682e0ea3921Schristos mfqt ($8,$4,$3) 6836410c867Schristos.L_bn_div_words_skip_div1: 684e0ea3921Schristos dmultu ($6,$8) 6856410c867Schristos dsll $15,$4,4*8 # bits 6866410c867Schristos dsrl $1,$5,4*8 # bits 6876410c867Schristos or $15,$1 688e0ea3921Schristos mflo ($12,$6,$8) 689e0ea3921Schristos mfhi ($13,$6,$8) 6906410c867Schristos.L_bn_div_words_inner_loop1: 6916410c867Schristos sltu $14,$15,$12 6926410c867Schristos seq $24,$9,$13 6936410c867Schristos sltu $1,$9,$13 6946410c867Schristos and $14,$24 6956410c867Schristos sltu $2,$12,$6 6966410c867Schristos or $1,$14 6976410c867Schristos .set noreorder 6986410c867Schristos beqz $1,.L_bn_div_words_inner_loop1_done 6996410c867Schristos dsubu $13,$2 7006410c867Schristos dsubu $12,$6 7016410c867Schristos b .L_bn_div_words_inner_loop1 7026410c867Schristos dsubu $8,1 7036410c867Schristos .set reorder 7046410c867Schristos.L_bn_div_words_inner_loop1_done: 7056410c867Schristos 7066410c867Schristos dsll $5,4*8 # bits 7076410c867Schristos dsubu $4,$15,$12 7086410c867Schristos dsll $2,$8,4*8 # bits 7096410c867Schristos 7106410c867Schristos li $8,-1 7116410c867Schristos dsrl $9,$4,4*8 # bits 7126410c867Schristos dsrl $8,4*8 # q=0xffffffff 7136410c867Schristos beq $3,$9,.L_bn_div_words_skip_div2 714e0ea3921Schristos ddivu ($4,$3) 715e0ea3921Schristos mfqt ($8,$4,$3) 7166410c867Schristos.L_bn_div_words_skip_div2: 717e0ea3921Schristos dmultu ($6,$8) 7186410c867Schristos dsll $15,$4,4*8 # bits 7196410c867Schristos dsrl $1,$5,4*8 # bits 7206410c867Schristos or $15,$1 721e0ea3921Schristos mflo ($12,$6,$8) 722e0ea3921Schristos mfhi ($13,$6,$8) 7236410c867Schristos.L_bn_div_words_inner_loop2: 7246410c867Schristos sltu $14,$15,$12 7256410c867Schristos seq $24,$9,$13 7266410c867Schristos sltu $1,$9,$13 7276410c867Schristos and $14,$24 7286410c867Schristos sltu $3,$12,$6 7296410c867Schristos or $1,$14 7306410c867Schristos .set noreorder 7316410c867Schristos beqz $1,.L_bn_div_words_inner_loop2_done 7326410c867Schristos dsubu $13,$3 7336410c867Schristos dsubu $12,$6 7346410c867Schristos b .L_bn_div_words_inner_loop2 7356410c867Schristos dsubu $8,1 7366410c867Schristos .set reorder 7376410c867Schristos.L_bn_div_words_inner_loop2_done: 7386410c867Schristos 7396410c867Schristos dsubu $4,$15,$12 7406410c867Schristos or $2,$8 7416410c867Schristos dsrl $3,$4,$25 # $3 contains remainder if anybody wants it 7426410c867Schristos dsrl $6,$25 # restore $6 7436410c867Schristos 7446410c867Schristos .set noreorder 7456410c867Schristos move $5,$3 7466410c867Schristos jr $31 7476410c867Schristos move $4,$2 7486410c867Schristos.end bn_div_words_internal 7496410c867Schristos 7506410c867Schristos.align 5 7516410c867Schristos.globl bn_mul_comba8 7526410c867Schristos.ent bn_mul_comba8 7536410c867Schristosbn_mul_comba8: 7546410c867Schristos .set noreorder 7556410c867Schristos .frame $29,6*8,$31 7566410c867Schristos .mask 0x003f0000,-8 7576410c867Schristos dsubu $29,6*8 7586410c867Schristos sd $21,5*8($29) 7596410c867Schristos sd $20,4*8($29) 7606410c867Schristos sd $19,3*8($29) 7616410c867Schristos sd $18,2*8($29) 7626410c867Schristos sd $17,1*8($29) 7636410c867Schristos sd $16,0*8($29) 7646410c867Schristos 7656410c867Schristos .set reorder 7666410c867Schristos ld $12,0($5) # If compiled with -mips3 option on 7676410c867Schristos # R5000 box assembler barks on this 7686410c867Schristos # 1ine with "should not have mult/div 7696410c867Schristos # as last instruction in bb (R10K 7706410c867Schristos # bug)" warning. If anybody out there 7716410c867Schristos # has a clue about how to circumvent 7726410c867Schristos # this do send me a note. 7736410c867Schristos # <appro@fy.chalmers.se> 7746410c867Schristos 7756410c867Schristos ld $8,0($6) 7766410c867Schristos ld $13,8($5) 7776410c867Schristos ld $14,2*8($5) 778e0ea3921Schristos dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 7796410c867Schristos ld $15,3*8($5) 7806410c867Schristos ld $9,8($6) 7816410c867Schristos ld $10,2*8($6) 7826410c867Schristos ld $11,3*8($6) 783e0ea3921Schristos mflo ($2,$12,$8) 784e0ea3921Schristos mfhi ($3,$12,$8) 7856410c867Schristos 7866410c867Schristos ld $16,4*8($5) 7876410c867Schristos ld $18,5*8($5) 788e0ea3921Schristos dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 7896410c867Schristos ld $20,6*8($5) 7906410c867Schristos ld $5,7*8($5) 7916410c867Schristos ld $17,4*8($6) 7926410c867Schristos ld $19,5*8($6) 793e0ea3921Schristos mflo ($24,$12,$9) 794e0ea3921Schristos mfhi ($25,$12,$9) 7956410c867Schristos daddu $3,$24 7966410c867Schristos sltu $1,$3,$24 797e0ea3921Schristos dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 7986410c867Schristos daddu $7,$25,$1 7996410c867Schristos ld $21,6*8($6) 8006410c867Schristos ld $6,7*8($6) 8016410c867Schristos sd $2,0($4) # r[0]=c1; 802e0ea3921Schristos mflo ($24,$13,$8) 803e0ea3921Schristos mfhi ($25,$13,$8) 8046410c867Schristos daddu $3,$24 8056410c867Schristos sltu $1,$3,$24 806e0ea3921Schristos dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 8076410c867Schristos daddu $25,$1 8086410c867Schristos daddu $7,$25 8096410c867Schristos sltu $2,$7,$25 8106410c867Schristos sd $3,8($4) # r[1]=c2; 8116410c867Schristos 812e0ea3921Schristos mflo ($24,$14,$8) 813e0ea3921Schristos mfhi ($25,$14,$8) 8146410c867Schristos daddu $7,$24 8156410c867Schristos sltu $1,$7,$24 816e0ea3921Schristos dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 8176410c867Schristos daddu $25,$1 8186410c867Schristos daddu $2,$25 819e0ea3921Schristos mflo ($24,$13,$9) 820e0ea3921Schristos mfhi ($25,$13,$9) 8216410c867Schristos daddu $7,$24 8226410c867Schristos sltu $1,$7,$24 823e0ea3921Schristos dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 8246410c867Schristos daddu $25,$1 8256410c867Schristos daddu $2,$25 8266410c867Schristos sltu $3,$2,$25 827e0ea3921Schristos mflo ($24,$12,$10) 828e0ea3921Schristos mfhi ($25,$12,$10) 8296410c867Schristos daddu $7,$24 8306410c867Schristos sltu $1,$7,$24 831e0ea3921Schristos dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 8326410c867Schristos daddu $25,$1 8336410c867Schristos daddu $2,$25 8346410c867Schristos sltu $1,$2,$25 8356410c867Schristos daddu $3,$1 8366410c867Schristos sd $7,2*8($4) # r[2]=c3; 8376410c867Schristos 838e0ea3921Schristos mflo ($24,$12,$11) 839e0ea3921Schristos mfhi ($25,$12,$11) 8406410c867Schristos daddu $2,$24 8416410c867Schristos sltu $1,$2,$24 842e0ea3921Schristos dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 8436410c867Schristos daddu $25,$1 8446410c867Schristos daddu $3,$25 8456410c867Schristos sltu $7,$3,$25 846e0ea3921Schristos mflo ($24,$13,$10) 847e0ea3921Schristos mfhi ($25,$13,$10) 8486410c867Schristos daddu $2,$24 8496410c867Schristos sltu $1,$2,$24 850e0ea3921Schristos dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 8516410c867Schristos daddu $25,$1 8526410c867Schristos daddu $3,$25 8536410c867Schristos sltu $1,$3,$25 8546410c867Schristos daddu $7,$1 855e0ea3921Schristos mflo ($24,$14,$9) 856e0ea3921Schristos mfhi ($25,$14,$9) 8576410c867Schristos daddu $2,$24 8586410c867Schristos sltu $1,$2,$24 859e0ea3921Schristos dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 8606410c867Schristos daddu $25,$1 8616410c867Schristos daddu $3,$25 8626410c867Schristos sltu $1,$3,$25 8636410c867Schristos daddu $7,$1 864e0ea3921Schristos mflo ($24,$15,$8) 865e0ea3921Schristos mfhi ($25,$15,$8) 8666410c867Schristos daddu $2,$24 8676410c867Schristos sltu $1,$2,$24 868e0ea3921Schristos dmultu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); 8696410c867Schristos daddu $25,$1 8706410c867Schristos daddu $3,$25 8716410c867Schristos sltu $1,$3,$25 8726410c867Schristos daddu $7,$1 8736410c867Schristos sd $2,3*8($4) # r[3]=c1; 8746410c867Schristos 875e0ea3921Schristos mflo ($24,$16,$8) 876e0ea3921Schristos mfhi ($25,$16,$8) 8776410c867Schristos daddu $3,$24 8786410c867Schristos sltu $1,$3,$24 879e0ea3921Schristos dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 8806410c867Schristos daddu $25,$1 8816410c867Schristos daddu $7,$25 8826410c867Schristos sltu $2,$7,$25 883e0ea3921Schristos mflo ($24,$15,$9) 884e0ea3921Schristos mfhi ($25,$15,$9) 8856410c867Schristos daddu $3,$24 8866410c867Schristos sltu $1,$3,$24 887e0ea3921Schristos dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 8886410c867Schristos daddu $25,$1 8896410c867Schristos daddu $7,$25 8906410c867Schristos sltu $1,$7,$25 8916410c867Schristos daddu $2,$1 892e0ea3921Schristos mflo ($24,$14,$10) 893e0ea3921Schristos mfhi ($25,$14,$10) 8946410c867Schristos daddu $3,$24 8956410c867Schristos sltu $1,$3,$24 896e0ea3921Schristos dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 8976410c867Schristos daddu $25,$1 8986410c867Schristos daddu $7,$25 8996410c867Schristos sltu $1,$7,$25 9006410c867Schristos daddu $2,$1 901e0ea3921Schristos mflo ($24,$13,$11) 902e0ea3921Schristos mfhi ($25,$13,$11) 9036410c867Schristos daddu $3,$24 9046410c867Schristos sltu $1,$3,$24 905e0ea3921Schristos dmultu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); 9066410c867Schristos daddu $25,$1 9076410c867Schristos daddu $7,$25 9086410c867Schristos sltu $1,$7,$25 9096410c867Schristos daddu $2,$1 910e0ea3921Schristos mflo ($24,$12,$17) 911e0ea3921Schristos mfhi ($25,$12,$17) 9126410c867Schristos daddu $3,$24 9136410c867Schristos sltu $1,$3,$24 914e0ea3921Schristos dmultu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); 9156410c867Schristos daddu $25,$1 9166410c867Schristos daddu $7,$25 9176410c867Schristos sltu $1,$7,$25 9186410c867Schristos daddu $2,$1 9196410c867Schristos sd $3,4*8($4) # r[4]=c2; 9206410c867Schristos 921e0ea3921Schristos mflo ($24,$12,$19) 922e0ea3921Schristos mfhi ($25,$12,$19) 9236410c867Schristos daddu $7,$24 9246410c867Schristos sltu $1,$7,$24 925e0ea3921Schristos dmultu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); 9266410c867Schristos daddu $25,$1 9276410c867Schristos daddu $2,$25 9286410c867Schristos sltu $3,$2,$25 929e0ea3921Schristos mflo ($24,$13,$17) 930e0ea3921Schristos mfhi ($25,$13,$17) 9316410c867Schristos daddu $7,$24 9326410c867Schristos sltu $1,$7,$24 933e0ea3921Schristos dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 9346410c867Schristos daddu $25,$1 9356410c867Schristos daddu $2,$25 9366410c867Schristos sltu $1,$2,$25 9376410c867Schristos daddu $3,$1 938e0ea3921Schristos mflo ($24,$14,$11) 939e0ea3921Schristos mfhi ($25,$14,$11) 9406410c867Schristos daddu $7,$24 9416410c867Schristos sltu $1,$7,$24 942e0ea3921Schristos dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 9436410c867Schristos daddu $25,$1 9446410c867Schristos daddu $2,$25 9456410c867Schristos sltu $1,$2,$25 9466410c867Schristos daddu $3,$1 947e0ea3921Schristos mflo ($24,$15,$10) 948e0ea3921Schristos mfhi ($25,$15,$10) 9496410c867Schristos daddu $7,$24 9506410c867Schristos sltu $1,$7,$24 951e0ea3921Schristos dmultu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); 9526410c867Schristos daddu $25,$1 9536410c867Schristos daddu $2,$25 9546410c867Schristos sltu $1,$2,$25 9556410c867Schristos daddu $3,$1 956e0ea3921Schristos mflo ($24,$16,$9) 957e0ea3921Schristos mfhi ($25,$16,$9) 9586410c867Schristos daddu $7,$24 9596410c867Schristos sltu $1,$7,$24 960e0ea3921Schristos dmultu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); 9616410c867Schristos daddu $25,$1 9626410c867Schristos daddu $2,$25 9636410c867Schristos sltu $1,$2,$25 9646410c867Schristos daddu $3,$1 965e0ea3921Schristos mflo ($24,$18,$8) 966e0ea3921Schristos mfhi ($25,$18,$8) 9676410c867Schristos daddu $7,$24 9686410c867Schristos sltu $1,$7,$24 969e0ea3921Schristos dmultu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); 9706410c867Schristos daddu $25,$1 9716410c867Schristos daddu $2,$25 9726410c867Schristos sltu $1,$2,$25 9736410c867Schristos daddu $3,$1 9746410c867Schristos sd $7,5*8($4) # r[5]=c3; 9756410c867Schristos 976e0ea3921Schristos mflo ($24,$20,$8) 977e0ea3921Schristos mfhi ($25,$20,$8) 9786410c867Schristos daddu $2,$24 9796410c867Schristos sltu $1,$2,$24 980e0ea3921Schristos dmultu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); 9816410c867Schristos daddu $25,$1 9826410c867Schristos daddu $3,$25 9836410c867Schristos sltu $7,$3,$25 984e0ea3921Schristos mflo ($24,$18,$9) 985e0ea3921Schristos mfhi ($25,$18,$9) 9866410c867Schristos daddu $2,$24 9876410c867Schristos sltu $1,$2,$24 988e0ea3921Schristos dmultu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); 9896410c867Schristos daddu $25,$1 9906410c867Schristos daddu $3,$25 9916410c867Schristos sltu $1,$3,$25 9926410c867Schristos daddu $7,$1 993e0ea3921Schristos mflo ($24,$16,$10) 994e0ea3921Schristos mfhi ($25,$16,$10) 9956410c867Schristos daddu $2,$24 9966410c867Schristos sltu $1,$2,$24 997e0ea3921Schristos dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 9986410c867Schristos daddu $25,$1 9996410c867Schristos daddu $3,$25 10006410c867Schristos sltu $1,$3,$25 10016410c867Schristos daddu $7,$1 1002e0ea3921Schristos mflo ($24,$15,$11) 1003e0ea3921Schristos mfhi ($25,$15,$11) 10046410c867Schristos daddu $2,$24 10056410c867Schristos sltu $1,$2,$24 1006e0ea3921Schristos dmultu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); 10076410c867Schristos daddu $25,$1 10086410c867Schristos daddu $3,$25 10096410c867Schristos sltu $1,$3,$25 10106410c867Schristos daddu $7,$1 1011e0ea3921Schristos mflo ($24,$14,$17) 1012e0ea3921Schristos mfhi ($25,$14,$17) 10136410c867Schristos daddu $2,$24 10146410c867Schristos sltu $1,$2,$24 1015e0ea3921Schristos dmultu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); 10166410c867Schristos daddu $25,$1 10176410c867Schristos daddu $3,$25 10186410c867Schristos sltu $1,$3,$25 10196410c867Schristos daddu $7,$1 1020e0ea3921Schristos mflo ($24,$13,$19) 1021e0ea3921Schristos mfhi ($25,$13,$19) 10226410c867Schristos daddu $2,$24 10236410c867Schristos sltu $1,$2,$24 1024e0ea3921Schristos dmultu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); 10256410c867Schristos daddu $25,$1 10266410c867Schristos daddu $3,$25 10276410c867Schristos sltu $1,$3,$25 10286410c867Schristos daddu $7,$1 1029e0ea3921Schristos mflo ($24,$12,$21) 1030e0ea3921Schristos mfhi ($25,$12,$21) 10316410c867Schristos daddu $2,$24 10326410c867Schristos sltu $1,$2,$24 1033e0ea3921Schristos dmultu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); 10346410c867Schristos daddu $25,$1 10356410c867Schristos daddu $3,$25 10366410c867Schristos sltu $1,$3,$25 10376410c867Schristos daddu $7,$1 10386410c867Schristos sd $2,6*8($4) # r[6]=c1; 10396410c867Schristos 1040e0ea3921Schristos mflo ($24,$12,$6) 1041e0ea3921Schristos mfhi ($25,$12,$6) 10426410c867Schristos daddu $3,$24 10436410c867Schristos sltu $1,$3,$24 1044e0ea3921Schristos dmultu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); 10456410c867Schristos daddu $25,$1 10466410c867Schristos daddu $7,$25 10476410c867Schristos sltu $2,$7,$25 1048e0ea3921Schristos mflo ($24,$13,$21) 1049e0ea3921Schristos mfhi ($25,$13,$21) 10506410c867Schristos daddu $3,$24 10516410c867Schristos sltu $1,$3,$24 1052e0ea3921Schristos dmultu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); 10536410c867Schristos daddu $25,$1 10546410c867Schristos daddu $7,$25 10556410c867Schristos sltu $1,$7,$25 10566410c867Schristos daddu $2,$1 1057e0ea3921Schristos mflo ($24,$14,$19) 1058e0ea3921Schristos mfhi ($25,$14,$19) 10596410c867Schristos daddu $3,$24 10606410c867Schristos sltu $1,$3,$24 1061e0ea3921Schristos dmultu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); 10626410c867Schristos daddu $25,$1 10636410c867Schristos daddu $7,$25 10646410c867Schristos sltu $1,$7,$25 10656410c867Schristos daddu $2,$1 1066e0ea3921Schristos mflo ($24,$15,$17) 1067e0ea3921Schristos mfhi ($25,$15,$17) 10686410c867Schristos daddu $3,$24 10696410c867Schristos sltu $1,$3,$24 1070e0ea3921Schristos dmultu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); 10716410c867Schristos daddu $25,$1 10726410c867Schristos daddu $7,$25 10736410c867Schristos sltu $1,$7,$25 10746410c867Schristos daddu $2,$1 1075e0ea3921Schristos mflo ($24,$16,$11) 1076e0ea3921Schristos mfhi ($25,$16,$11) 10776410c867Schristos daddu $3,$24 10786410c867Schristos sltu $1,$3,$24 1079e0ea3921Schristos dmultu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); 10806410c867Schristos daddu $25,$1 10816410c867Schristos daddu $7,$25 10826410c867Schristos sltu $1,$7,$25 10836410c867Schristos daddu $2,$1 1084e0ea3921Schristos mflo ($24,$18,$10) 1085e0ea3921Schristos mfhi ($25,$18,$10) 10866410c867Schristos daddu $3,$24 10876410c867Schristos sltu $1,$3,$24 1088e0ea3921Schristos dmultu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); 10896410c867Schristos daddu $25,$1 10906410c867Schristos daddu $7,$25 10916410c867Schristos sltu $1,$7,$25 10926410c867Schristos daddu $2,$1 1093e0ea3921Schristos mflo ($24,$20,$9) 1094e0ea3921Schristos mfhi ($25,$20,$9) 10956410c867Schristos daddu $3,$24 10966410c867Schristos sltu $1,$3,$24 1097e0ea3921Schristos dmultu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); 10986410c867Schristos daddu $25,$1 10996410c867Schristos daddu $7,$25 11006410c867Schristos sltu $1,$7,$25 11016410c867Schristos daddu $2,$1 1102e0ea3921Schristos mflo ($24,$5,$8) 1103e0ea3921Schristos mfhi ($25,$5,$8) 11046410c867Schristos daddu $3,$24 11056410c867Schristos sltu $1,$3,$24 1106e0ea3921Schristos dmultu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); 11076410c867Schristos daddu $25,$1 11086410c867Schristos daddu $7,$25 11096410c867Schristos sltu $1,$7,$25 11106410c867Schristos daddu $2,$1 11116410c867Schristos sd $3,7*8($4) # r[7]=c2; 11126410c867Schristos 1113e0ea3921Schristos mflo ($24,$5,$9) 1114e0ea3921Schristos mfhi ($25,$5,$9) 11156410c867Schristos daddu $7,$24 11166410c867Schristos sltu $1,$7,$24 1117e0ea3921Schristos dmultu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); 11186410c867Schristos daddu $25,$1 11196410c867Schristos daddu $2,$25 11206410c867Schristos sltu $3,$2,$25 1121e0ea3921Schristos mflo ($24,$20,$10) 1122e0ea3921Schristos mfhi ($25,$20,$10) 11236410c867Schristos daddu $7,$24 11246410c867Schristos sltu $1,$7,$24 1125e0ea3921Schristos dmultu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); 11266410c867Schristos daddu $25,$1 11276410c867Schristos daddu $2,$25 11286410c867Schristos sltu $1,$2,$25 11296410c867Schristos daddu $3,$1 1130e0ea3921Schristos mflo ($24,$18,$11) 1131e0ea3921Schristos mfhi ($25,$18,$11) 11326410c867Schristos daddu $7,$24 11336410c867Schristos sltu $1,$7,$24 1134e0ea3921Schristos dmultu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); 11356410c867Schristos daddu $25,$1 11366410c867Schristos daddu $2,$25 11376410c867Schristos sltu $1,$2,$25 11386410c867Schristos daddu $3,$1 1139e0ea3921Schristos mflo ($24,$16,$17) 1140e0ea3921Schristos mfhi ($25,$16,$17) 11416410c867Schristos daddu $7,$24 11426410c867Schristos sltu $1,$7,$24 1143e0ea3921Schristos dmultu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); 11446410c867Schristos daddu $25,$1 11456410c867Schristos daddu $2,$25 11466410c867Schristos sltu $1,$2,$25 11476410c867Schristos daddu $3,$1 1148e0ea3921Schristos mflo ($24,$15,$19) 1149e0ea3921Schristos mfhi ($25,$15,$19) 11506410c867Schristos daddu $7,$24 11516410c867Schristos sltu $1,$7,$24 1152e0ea3921Schristos dmultu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); 11536410c867Schristos daddu $25,$1 11546410c867Schristos daddu $2,$25 11556410c867Schristos sltu $1,$2,$25 11566410c867Schristos daddu $3,$1 1157e0ea3921Schristos mflo ($24,$14,$21) 1158e0ea3921Schristos mfhi ($25,$14,$21) 11596410c867Schristos daddu $7,$24 11606410c867Schristos sltu $1,$7,$24 1161e0ea3921Schristos dmultu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); 11626410c867Schristos daddu $25,$1 11636410c867Schristos daddu $2,$25 11646410c867Schristos sltu $1,$2,$25 11656410c867Schristos daddu $3,$1 1166e0ea3921Schristos mflo ($24,$13,$6) 1167e0ea3921Schristos mfhi ($25,$13,$6) 11686410c867Schristos daddu $7,$24 11696410c867Schristos sltu $1,$7,$24 1170e0ea3921Schristos dmultu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); 11716410c867Schristos daddu $25,$1 11726410c867Schristos daddu $2,$25 11736410c867Schristos sltu $1,$2,$25 11746410c867Schristos daddu $3,$1 11756410c867Schristos sd $7,8*8($4) # r[8]=c3; 11766410c867Schristos 1177e0ea3921Schristos mflo ($24,$14,$6) 1178e0ea3921Schristos mfhi ($25,$14,$6) 11796410c867Schristos daddu $2,$24 11806410c867Schristos sltu $1,$2,$24 1181e0ea3921Schristos dmultu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); 11826410c867Schristos daddu $25,$1 11836410c867Schristos daddu $3,$25 11846410c867Schristos sltu $7,$3,$25 1185e0ea3921Schristos mflo ($24,$15,$21) 1186e0ea3921Schristos mfhi ($25,$15,$21) 11876410c867Schristos daddu $2,$24 11886410c867Schristos sltu $1,$2,$24 1189e0ea3921Schristos dmultu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); 11906410c867Schristos daddu $25,$1 11916410c867Schristos daddu $3,$25 11926410c867Schristos sltu $1,$3,$25 11936410c867Schristos daddu $7,$1 1194e0ea3921Schristos mflo ($24,$16,$19) 1195e0ea3921Schristos mfhi ($25,$16,$19) 11966410c867Schristos daddu $2,$24 11976410c867Schristos sltu $1,$2,$24 1198e0ea3921Schristos dmultu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); 11996410c867Schristos daddu $25,$1 12006410c867Schristos daddu $3,$25 12016410c867Schristos sltu $1,$3,$25 12026410c867Schristos daddu $7,$1 1203e0ea3921Schristos mflo ($24,$18,$17) 1204e0ea3921Schristos mfhi ($25,$18,$17) 12056410c867Schristos daddu $2,$24 12066410c867Schristos sltu $1,$2,$24 1207e0ea3921Schristos dmultu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); 12086410c867Schristos daddu $25,$1 12096410c867Schristos daddu $3,$25 12106410c867Schristos sltu $1,$3,$25 12116410c867Schristos daddu $7,$1 1212e0ea3921Schristos mflo ($24,$20,$11) 1213e0ea3921Schristos mfhi ($25,$20,$11) 12146410c867Schristos daddu $2,$24 12156410c867Schristos sltu $1,$2,$24 1216e0ea3921Schristos dmultu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); 12176410c867Schristos daddu $25,$1 12186410c867Schristos daddu $3,$25 12196410c867Schristos sltu $1,$3,$25 12206410c867Schristos daddu $7,$1 1221e0ea3921Schristos mflo ($24,$5,$10) 1222e0ea3921Schristos mfhi ($25,$5,$10) 12236410c867Schristos daddu $2,$24 12246410c867Schristos sltu $1,$2,$24 1225e0ea3921Schristos dmultu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); 12266410c867Schristos daddu $25,$1 12276410c867Schristos daddu $3,$25 12286410c867Schristos sltu $1,$3,$25 12296410c867Schristos daddu $7,$1 12306410c867Schristos sd $2,9*8($4) # r[9]=c1; 12316410c867Schristos 1232e0ea3921Schristos mflo ($24,$5,$11) 1233e0ea3921Schristos mfhi ($25,$5,$11) 12346410c867Schristos daddu $3,$24 12356410c867Schristos sltu $1,$3,$24 1236e0ea3921Schristos dmultu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); 12376410c867Schristos daddu $25,$1 12386410c867Schristos daddu $7,$25 12396410c867Schristos sltu $2,$7,$25 1240e0ea3921Schristos mflo ($24,$20,$17) 1241e0ea3921Schristos mfhi ($25,$20,$17) 12426410c867Schristos daddu $3,$24 12436410c867Schristos sltu $1,$3,$24 1244e0ea3921Schristos dmultu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); 12456410c867Schristos daddu $25,$1 12466410c867Schristos daddu $7,$25 12476410c867Schristos sltu $1,$7,$25 12486410c867Schristos daddu $2,$1 1249e0ea3921Schristos mflo ($24,$18,$19) 1250e0ea3921Schristos mfhi ($25,$18,$19) 12516410c867Schristos daddu $3,$24 12526410c867Schristos sltu $1,$3,$24 1253e0ea3921Schristos dmultu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); 12546410c867Schristos daddu $25,$1 12556410c867Schristos daddu $7,$25 12566410c867Schristos sltu $1,$7,$25 12576410c867Schristos daddu $2,$1 1258e0ea3921Schristos mflo ($24,$16,$21) 1259e0ea3921Schristos mfhi ($25,$16,$21) 12606410c867Schristos daddu $3,$24 12616410c867Schristos sltu $1,$3,$24 1262e0ea3921Schristos dmultu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); 12636410c867Schristos daddu $25,$1 12646410c867Schristos daddu $7,$25 12656410c867Schristos sltu $1,$7,$25 12666410c867Schristos daddu $2,$1 1267e0ea3921Schristos mflo ($24,$15,$6) 1268e0ea3921Schristos mfhi ($25,$15,$6) 12696410c867Schristos daddu $3,$24 12706410c867Schristos sltu $1,$3,$24 1271e0ea3921Schristos dmultu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); 12726410c867Schristos daddu $25,$1 12736410c867Schristos daddu $7,$25 12746410c867Schristos sltu $1,$7,$25 12756410c867Schristos daddu $2,$1 12766410c867Schristos sd $3,10*8($4) # r[10]=c2; 12776410c867Schristos 1278e0ea3921Schristos mflo ($24,$16,$6) 1279e0ea3921Schristos mfhi ($25,$16,$6) 12806410c867Schristos daddu $7,$24 12816410c867Schristos sltu $1,$7,$24 1282e0ea3921Schristos dmultu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); 12836410c867Schristos daddu $25,$1 12846410c867Schristos daddu $2,$25 12856410c867Schristos sltu $3,$2,$25 1286e0ea3921Schristos mflo ($24,$18,$21) 1287e0ea3921Schristos mfhi ($25,$18,$21) 12886410c867Schristos daddu $7,$24 12896410c867Schristos sltu $1,$7,$24 1290e0ea3921Schristos dmultu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); 12916410c867Schristos daddu $25,$1 12926410c867Schristos daddu $2,$25 12936410c867Schristos sltu $1,$2,$25 12946410c867Schristos daddu $3,$1 1295e0ea3921Schristos mflo ($24,$20,$19) 1296e0ea3921Schristos mfhi ($25,$20,$19) 12976410c867Schristos daddu $7,$24 12986410c867Schristos sltu $1,$7,$24 1299e0ea3921Schristos dmultu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); 13006410c867Schristos daddu $25,$1 13016410c867Schristos daddu $2,$25 13026410c867Schristos sltu $1,$2,$25 13036410c867Schristos daddu $3,$1 1304e0ea3921Schristos mflo ($24,$5,$17) 1305e0ea3921Schristos mfhi ($25,$5,$17) 13066410c867Schristos daddu $7,$24 13076410c867Schristos sltu $1,$7,$24 1308e0ea3921Schristos dmultu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); 13096410c867Schristos daddu $25,$1 13106410c867Schristos daddu $2,$25 13116410c867Schristos sltu $1,$2,$25 13126410c867Schristos daddu $3,$1 13136410c867Schristos sd $7,11*8($4) # r[11]=c3; 13146410c867Schristos 1315e0ea3921Schristos mflo ($24,$5,$19) 1316e0ea3921Schristos mfhi ($25,$5,$19) 13176410c867Schristos daddu $2,$24 13186410c867Schristos sltu $1,$2,$24 1319e0ea3921Schristos dmultu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); 13206410c867Schristos daddu $25,$1 13216410c867Schristos daddu $3,$25 13226410c867Schristos sltu $7,$3,$25 1323e0ea3921Schristos mflo ($24,$20,$21) 1324e0ea3921Schristos mfhi ($25,$20,$21) 13256410c867Schristos daddu $2,$24 13266410c867Schristos sltu $1,$2,$24 1327e0ea3921Schristos dmultu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); 13286410c867Schristos daddu $25,$1 13296410c867Schristos daddu $3,$25 13306410c867Schristos sltu $1,$3,$25 13316410c867Schristos daddu $7,$1 1332e0ea3921Schristos mflo ($24,$18,$6) 1333e0ea3921Schristos mfhi ($25,$18,$6) 13346410c867Schristos daddu $2,$24 13356410c867Schristos sltu $1,$2,$24 1336e0ea3921Schristos dmultu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); 13376410c867Schristos daddu $25,$1 13386410c867Schristos daddu $3,$25 13396410c867Schristos sltu $1,$3,$25 13406410c867Schristos daddu $7,$1 13416410c867Schristos sd $2,12*8($4) # r[12]=c1; 13426410c867Schristos 1343e0ea3921Schristos mflo ($24,$20,$6) 1344e0ea3921Schristos mfhi ($25,$20,$6) 13456410c867Schristos daddu $3,$24 13466410c867Schristos sltu $1,$3,$24 1347e0ea3921Schristos dmultu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); 13486410c867Schristos daddu $25,$1 13496410c867Schristos daddu $7,$25 13506410c867Schristos sltu $2,$7,$25 1351e0ea3921Schristos mflo ($24,$5,$21) 1352e0ea3921Schristos mfhi ($25,$5,$21) 13536410c867Schristos daddu $3,$24 13546410c867Schristos sltu $1,$3,$24 1355e0ea3921Schristos dmultu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); 13566410c867Schristos daddu $25,$1 13576410c867Schristos daddu $7,$25 13586410c867Schristos sltu $1,$7,$25 13596410c867Schristos daddu $2,$1 13606410c867Schristos sd $3,13*8($4) # r[13]=c2; 13616410c867Schristos 1362e0ea3921Schristos mflo ($24,$5,$6) 1363e0ea3921Schristos mfhi ($25,$5,$6) 13646410c867Schristos daddu $7,$24 13656410c867Schristos sltu $1,$7,$24 13666410c867Schristos daddu $25,$1 13676410c867Schristos daddu $2,$25 13686410c867Schristos sd $7,14*8($4) # r[14]=c3; 13696410c867Schristos sd $2,15*8($4) # r[15]=c1; 13706410c867Schristos 13716410c867Schristos .set noreorder 13726410c867Schristos ld $21,5*8($29) 13736410c867Schristos ld $20,4*8($29) 13746410c867Schristos ld $19,3*8($29) 13756410c867Schristos ld $18,2*8($29) 13766410c867Schristos ld $17,1*8($29) 13776410c867Schristos ld $16,0*8($29) 13786410c867Schristos jr $31 13796410c867Schristos daddu $29,6*8 13806410c867Schristos.end bn_mul_comba8 13816410c867Schristos 13826410c867Schristos.align 5 13836410c867Schristos.globl bn_mul_comba4 13846410c867Schristos.ent bn_mul_comba4 13856410c867Schristosbn_mul_comba4: 13866410c867Schristos .set reorder 13876410c867Schristos ld $12,0($5) 13886410c867Schristos ld $8,0($6) 13896410c867Schristos ld $13,8($5) 13906410c867Schristos ld $14,2*8($5) 1391e0ea3921Schristos dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 13926410c867Schristos ld $15,3*8($5) 13936410c867Schristos ld $9,8($6) 13946410c867Schristos ld $10,2*8($6) 13956410c867Schristos ld $11,3*8($6) 1396e0ea3921Schristos mflo ($2,$12,$8) 1397e0ea3921Schristos mfhi ($3,$12,$8) 13986410c867Schristos sd $2,0($4) 13996410c867Schristos 1400e0ea3921Schristos dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 1401e0ea3921Schristos mflo ($24,$12,$9) 1402e0ea3921Schristos mfhi ($25,$12,$9) 14036410c867Schristos daddu $3,$24 14046410c867Schristos sltu $1,$3,$24 1405e0ea3921Schristos dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 14066410c867Schristos daddu $7,$25,$1 1407e0ea3921Schristos mflo ($24,$13,$8) 1408e0ea3921Schristos mfhi ($25,$13,$8) 14096410c867Schristos daddu $3,$24 14106410c867Schristos sltu $1,$3,$24 1411e0ea3921Schristos dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 14126410c867Schristos daddu $25,$1 14136410c867Schristos daddu $7,$25 14146410c867Schristos sltu $2,$7,$25 14156410c867Schristos sd $3,8($4) 14166410c867Schristos 1417e0ea3921Schristos mflo ($24,$14,$8) 1418e0ea3921Schristos mfhi ($25,$14,$8) 14196410c867Schristos daddu $7,$24 14206410c867Schristos sltu $1,$7,$24 1421e0ea3921Schristos dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 14226410c867Schristos daddu $25,$1 14236410c867Schristos daddu $2,$25 1424e0ea3921Schristos mflo ($24,$13,$9) 1425e0ea3921Schristos mfhi ($25,$13,$9) 14266410c867Schristos daddu $7,$24 14276410c867Schristos sltu $1,$7,$24 1428e0ea3921Schristos dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 14296410c867Schristos daddu $25,$1 14306410c867Schristos daddu $2,$25 14316410c867Schristos sltu $3,$2,$25 1432e0ea3921Schristos mflo ($24,$12,$10) 1433e0ea3921Schristos mfhi ($25,$12,$10) 14346410c867Schristos daddu $7,$24 14356410c867Schristos sltu $1,$7,$24 1436e0ea3921Schristos dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 14376410c867Schristos daddu $25,$1 14386410c867Schristos daddu $2,$25 14396410c867Schristos sltu $1,$2,$25 14406410c867Schristos daddu $3,$1 14416410c867Schristos sd $7,2*8($4) 14426410c867Schristos 1443e0ea3921Schristos mflo ($24,$12,$11) 1444e0ea3921Schristos mfhi ($25,$12,$11) 14456410c867Schristos daddu $2,$24 14466410c867Schristos sltu $1,$2,$24 1447e0ea3921Schristos dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 14486410c867Schristos daddu $25,$1 14496410c867Schristos daddu $3,$25 14506410c867Schristos sltu $7,$3,$25 1451e0ea3921Schristos mflo ($24,$13,$10) 1452e0ea3921Schristos mfhi ($25,$13,$10) 14536410c867Schristos daddu $2,$24 14546410c867Schristos sltu $1,$2,$24 1455e0ea3921Schristos dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 14566410c867Schristos daddu $25,$1 14576410c867Schristos daddu $3,$25 14586410c867Schristos sltu $1,$3,$25 14596410c867Schristos daddu $7,$1 1460e0ea3921Schristos mflo ($24,$14,$9) 1461e0ea3921Schristos mfhi ($25,$14,$9) 14626410c867Schristos daddu $2,$24 14636410c867Schristos sltu $1,$2,$24 1464e0ea3921Schristos dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 14656410c867Schristos daddu $25,$1 14666410c867Schristos daddu $3,$25 14676410c867Schristos sltu $1,$3,$25 14686410c867Schristos daddu $7,$1 1469e0ea3921Schristos mflo ($24,$15,$8) 1470e0ea3921Schristos mfhi ($25,$15,$8) 14716410c867Schristos daddu $2,$24 14726410c867Schristos sltu $1,$2,$24 1473e0ea3921Schristos dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 14746410c867Schristos daddu $25,$1 14756410c867Schristos daddu $3,$25 14766410c867Schristos sltu $1,$3,$25 14776410c867Schristos daddu $7,$1 14786410c867Schristos sd $2,3*8($4) 14796410c867Schristos 1480e0ea3921Schristos mflo ($24,$15,$9) 1481e0ea3921Schristos mfhi ($25,$15,$9) 14826410c867Schristos daddu $3,$24 14836410c867Schristos sltu $1,$3,$24 1484e0ea3921Schristos dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 14856410c867Schristos daddu $25,$1 14866410c867Schristos daddu $7,$25 14876410c867Schristos sltu $2,$7,$25 1488e0ea3921Schristos mflo ($24,$14,$10) 1489e0ea3921Schristos mfhi ($25,$14,$10) 14906410c867Schristos daddu $3,$24 14916410c867Schristos sltu $1,$3,$24 1492e0ea3921Schristos dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 14936410c867Schristos daddu $25,$1 14946410c867Schristos daddu $7,$25 14956410c867Schristos sltu $1,$7,$25 14966410c867Schristos daddu $2,$1 1497e0ea3921Schristos mflo ($24,$13,$11) 1498e0ea3921Schristos mfhi ($25,$13,$11) 14996410c867Schristos daddu $3,$24 15006410c867Schristos sltu $1,$3,$24 1501e0ea3921Schristos dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 15026410c867Schristos daddu $25,$1 15036410c867Schristos daddu $7,$25 15046410c867Schristos sltu $1,$7,$25 15056410c867Schristos daddu $2,$1 15066410c867Schristos sd $3,4*8($4) 15076410c867Schristos 1508e0ea3921Schristos mflo ($24,$14,$11) 1509e0ea3921Schristos mfhi ($25,$14,$11) 15106410c867Schristos daddu $7,$24 15116410c867Schristos sltu $1,$7,$24 1512e0ea3921Schristos dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 15136410c867Schristos daddu $25,$1 15146410c867Schristos daddu $2,$25 15156410c867Schristos sltu $3,$2,$25 1516e0ea3921Schristos mflo ($24,$15,$10) 1517e0ea3921Schristos mfhi ($25,$15,$10) 15186410c867Schristos daddu $7,$24 15196410c867Schristos sltu $1,$7,$24 1520e0ea3921Schristos dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 15216410c867Schristos daddu $25,$1 15226410c867Schristos daddu $2,$25 15236410c867Schristos sltu $1,$2,$25 15246410c867Schristos daddu $3,$1 15256410c867Schristos sd $7,5*8($4) 15266410c867Schristos 1527e0ea3921Schristos mflo ($24,$15,$11) 1528e0ea3921Schristos mfhi ($25,$15,$11) 15296410c867Schristos daddu $2,$24 15306410c867Schristos sltu $1,$2,$24 15316410c867Schristos daddu $25,$1 15326410c867Schristos daddu $3,$25 15336410c867Schristos sd $2,6*8($4) 15346410c867Schristos sd $3,7*8($4) 15356410c867Schristos 15366410c867Schristos .set noreorder 15376410c867Schristos jr $31 15386410c867Schristos nop 15396410c867Schristos.end bn_mul_comba4 15406410c867Schristos 15416410c867Schristos.align 5 15426410c867Schristos.globl bn_sqr_comba8 15436410c867Schristos.ent bn_sqr_comba8 15446410c867Schristosbn_sqr_comba8: 15456410c867Schristos .set reorder 15466410c867Schristos ld $12,0($5) 15476410c867Schristos ld $13,8($5) 15486410c867Schristos ld $14,2*8($5) 15496410c867Schristos ld $15,3*8($5) 15506410c867Schristos 1551e0ea3921Schristos dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 15526410c867Schristos ld $8,4*8($5) 15536410c867Schristos ld $9,5*8($5) 15546410c867Schristos ld $10,6*8($5) 15556410c867Schristos ld $11,7*8($5) 1556e0ea3921Schristos mflo ($2,$12,$12) 1557e0ea3921Schristos mfhi ($3,$12,$12) 15586410c867Schristos sd $2,0($4) 15596410c867Schristos 1560e0ea3921Schristos dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 1561e0ea3921Schristos mflo ($24,$12,$13) 1562e0ea3921Schristos mfhi ($25,$12,$13) 15636410c867Schristos slt $2,$25,$0 15646410c867Schristos dsll $25,1 1565e0ea3921Schristos dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 15666410c867Schristos slt $6,$24,$0 15676410c867Schristos daddu $25,$6 15686410c867Schristos dsll $24,1 15696410c867Schristos daddu $3,$24 15706410c867Schristos sltu $1,$3,$24 15716410c867Schristos daddu $7,$25,$1 15726410c867Schristos sd $3,8($4) 1573*1b3d6f93Schristos sltu $1,$7,$25 1574*1b3d6f93Schristos daddu $2,$1 1575e0ea3921Schristos mflo ($24,$14,$12) 1576e0ea3921Schristos mfhi ($25,$14,$12) 15776410c867Schristos daddu $7,$24 15786410c867Schristos sltu $1,$7,$24 1579e0ea3921Schristos dmultu ($13,$13) # forward multiplication 15806410c867Schristos daddu $7,$24 15816410c867Schristos daddu $1,$25 15826410c867Schristos sltu $24,$7,$24 15836410c867Schristos daddu $2,$1 15846410c867Schristos daddu $25,$24 15856410c867Schristos sltu $3,$2,$1 15866410c867Schristos daddu $2,$25 15876410c867Schristos sltu $25,$2,$25 15886410c867Schristos daddu $3,$25 1589e0ea3921Schristos mflo ($24,$13,$13) 1590e0ea3921Schristos mfhi ($25,$13,$13) 15916410c867Schristos daddu $7,$24 15926410c867Schristos sltu $1,$7,$24 1593e0ea3921Schristos dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 15946410c867Schristos daddu $25,$1 15956410c867Schristos daddu $2,$25 15966410c867Schristos sltu $1,$2,$25 15976410c867Schristos daddu $3,$1 15986410c867Schristos sd $7,2*8($4) 1599e0ea3921Schristos mflo ($24,$12,$15) 1600e0ea3921Schristos mfhi ($25,$12,$15) 16016410c867Schristos daddu $2,$24 16026410c867Schristos sltu $1,$2,$24 1603e0ea3921Schristos dmultu ($13,$14) # forward multiplication 16046410c867Schristos daddu $2,$24 16056410c867Schristos daddu $1,$25 16066410c867Schristos sltu $24,$2,$24 16076410c867Schristos daddu $3,$1 16086410c867Schristos daddu $25,$24 16096410c867Schristos sltu $7,$3,$1 16106410c867Schristos daddu $3,$25 16116410c867Schristos sltu $25,$3,$25 16126410c867Schristos daddu $7,$25 1613e0ea3921Schristos mflo ($24,$13,$14) 1614e0ea3921Schristos mfhi ($25,$13,$14) 16156410c867Schristos daddu $2,$24 16166410c867Schristos sltu $1,$2,$24 1617e0ea3921Schristos dmultu ($8,$12) # forward multiplication 16186410c867Schristos daddu $2,$24 16196410c867Schristos daddu $1,$25 16206410c867Schristos sltu $24,$2,$24 16216410c867Schristos daddu $3,$1 16226410c867Schristos daddu $25,$24 16236410c867Schristos sltu $1,$3,$1 16246410c867Schristos daddu $3,$25 16256410c867Schristos daddu $7,$1 16266410c867Schristos sltu $25,$3,$25 16276410c867Schristos daddu $7,$25 1628e0ea3921Schristos mflo ($24,$8,$12) 1629e0ea3921Schristos mfhi ($25,$8,$12) 16306410c867Schristos sd $2,3*8($4) 16316410c867Schristos daddu $3,$24 16326410c867Schristos sltu $1,$3,$24 1633e0ea3921Schristos dmultu ($15,$13) # forward multiplication 16346410c867Schristos daddu $3,$24 16356410c867Schristos daddu $1,$25 16366410c867Schristos sltu $24,$3,$24 16376410c867Schristos daddu $7,$1 16386410c867Schristos daddu $25,$24 16396410c867Schristos sltu $2,$7,$1 16406410c867Schristos daddu $7,$25 16416410c867Schristos sltu $25,$7,$25 16426410c867Schristos daddu $2,$25 1643e0ea3921Schristos mflo ($24,$15,$13) 1644e0ea3921Schristos mfhi ($25,$15,$13) 16456410c867Schristos daddu $3,$24 16466410c867Schristos sltu $1,$3,$24 1647e0ea3921Schristos dmultu ($14,$14) # forward multiplication 16486410c867Schristos daddu $3,$24 16496410c867Schristos daddu $1,$25 16506410c867Schristos sltu $24,$3,$24 16516410c867Schristos daddu $7,$1 16526410c867Schristos daddu $25,$24 16536410c867Schristos sltu $1,$7,$1 16546410c867Schristos daddu $7,$25 16556410c867Schristos daddu $2,$1 16566410c867Schristos sltu $25,$7,$25 16576410c867Schristos daddu $2,$25 1658e0ea3921Schristos mflo ($24,$14,$14) 1659e0ea3921Schristos mfhi ($25,$14,$14) 16606410c867Schristos daddu $3,$24 16616410c867Schristos sltu $1,$3,$24 1662e0ea3921Schristos dmultu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); 16636410c867Schristos daddu $25,$1 16646410c867Schristos daddu $7,$25 16656410c867Schristos sltu $1,$7,$25 16666410c867Schristos daddu $2,$1 16676410c867Schristos sd $3,4*8($4) 1668e0ea3921Schristos mflo ($24,$12,$9) 1669e0ea3921Schristos mfhi ($25,$12,$9) 16706410c867Schristos daddu $7,$24 16716410c867Schristos sltu $1,$7,$24 1672e0ea3921Schristos dmultu ($13,$8) # forward multiplication 16736410c867Schristos daddu $7,$24 16746410c867Schristos daddu $1,$25 16756410c867Schristos sltu $24,$7,$24 16766410c867Schristos daddu $2,$1 16776410c867Schristos daddu $25,$24 16786410c867Schristos sltu $3,$2,$1 16796410c867Schristos daddu $2,$25 16806410c867Schristos sltu $25,$2,$25 16816410c867Schristos daddu $3,$25 1682e0ea3921Schristos mflo ($24,$13,$8) 1683e0ea3921Schristos mfhi ($25,$13,$8) 16846410c867Schristos daddu $7,$24 16856410c867Schristos sltu $1,$7,$24 1686e0ea3921Schristos dmultu ($14,$15) # forward multiplication 16876410c867Schristos daddu $7,$24 16886410c867Schristos daddu $1,$25 16896410c867Schristos sltu $24,$7,$24 16906410c867Schristos daddu $2,$1 16916410c867Schristos daddu $25,$24 16926410c867Schristos sltu $1,$2,$1 16936410c867Schristos daddu $2,$25 16946410c867Schristos daddu $3,$1 16956410c867Schristos sltu $25,$2,$25 16966410c867Schristos daddu $3,$25 1697e0ea3921Schristos mflo ($24,$14,$15) 1698e0ea3921Schristos mfhi ($25,$14,$15) 16996410c867Schristos daddu $7,$24 17006410c867Schristos sltu $1,$7,$24 1701e0ea3921Schristos dmultu ($10,$12) # forward multiplication 17026410c867Schristos daddu $7,$24 17036410c867Schristos daddu $1,$25 17046410c867Schristos sltu $24,$7,$24 17056410c867Schristos daddu $2,$1 17066410c867Schristos daddu $25,$24 17076410c867Schristos sltu $1,$2,$1 17086410c867Schristos daddu $2,$25 17096410c867Schristos daddu $3,$1 17106410c867Schristos sltu $25,$2,$25 17116410c867Schristos daddu $3,$25 1712e0ea3921Schristos mflo ($24,$10,$12) 1713e0ea3921Schristos mfhi ($25,$10,$12) 17146410c867Schristos sd $7,5*8($4) 17156410c867Schristos daddu $2,$24 17166410c867Schristos sltu $1,$2,$24 1717e0ea3921Schristos dmultu ($9,$13) # forward multiplication 17186410c867Schristos daddu $2,$24 17196410c867Schristos daddu $1,$25 17206410c867Schristos sltu $24,$2,$24 17216410c867Schristos daddu $3,$1 17226410c867Schristos daddu $25,$24 17236410c867Schristos sltu $7,$3,$1 17246410c867Schristos daddu $3,$25 17256410c867Schristos sltu $25,$3,$25 17266410c867Schristos daddu $7,$25 1727e0ea3921Schristos mflo ($24,$9,$13) 1728e0ea3921Schristos mfhi ($25,$9,$13) 17296410c867Schristos daddu $2,$24 17306410c867Schristos sltu $1,$2,$24 1731e0ea3921Schristos dmultu ($8,$14) # forward multiplication 17326410c867Schristos daddu $2,$24 17336410c867Schristos daddu $1,$25 17346410c867Schristos sltu $24,$2,$24 17356410c867Schristos daddu $3,$1 17366410c867Schristos daddu $25,$24 17376410c867Schristos sltu $1,$3,$1 17386410c867Schristos daddu $3,$25 17396410c867Schristos daddu $7,$1 17406410c867Schristos sltu $25,$3,$25 17416410c867Schristos daddu $7,$25 1742e0ea3921Schristos mflo ($24,$8,$14) 1743e0ea3921Schristos mfhi ($25,$8,$14) 17446410c867Schristos daddu $2,$24 17456410c867Schristos sltu $1,$2,$24 1746e0ea3921Schristos dmultu ($15,$15) # forward multiplication 17476410c867Schristos daddu $2,$24 17486410c867Schristos daddu $1,$25 17496410c867Schristos sltu $24,$2,$24 17506410c867Schristos daddu $3,$1 17516410c867Schristos daddu $25,$24 17526410c867Schristos sltu $1,$3,$1 17536410c867Schristos daddu $3,$25 17546410c867Schristos daddu $7,$1 17556410c867Schristos sltu $25,$3,$25 17566410c867Schristos daddu $7,$25 1757e0ea3921Schristos mflo ($24,$15,$15) 1758e0ea3921Schristos mfhi ($25,$15,$15) 17596410c867Schristos daddu $2,$24 17606410c867Schristos sltu $1,$2,$24 1761e0ea3921Schristos dmultu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); 17626410c867Schristos daddu $25,$1 17636410c867Schristos daddu $3,$25 17646410c867Schristos sltu $1,$3,$25 17656410c867Schristos daddu $7,$1 17666410c867Schristos sd $2,6*8($4) 1767e0ea3921Schristos mflo ($24,$12,$11) 1768e0ea3921Schristos mfhi ($25,$12,$11) 17696410c867Schristos daddu $3,$24 17706410c867Schristos sltu $1,$3,$24 1771e0ea3921Schristos dmultu ($13,$10) # forward multiplication 17726410c867Schristos daddu $3,$24 17736410c867Schristos daddu $1,$25 17746410c867Schristos sltu $24,$3,$24 17756410c867Schristos daddu $7,$1 17766410c867Schristos daddu $25,$24 17776410c867Schristos sltu $2,$7,$1 17786410c867Schristos daddu $7,$25 17796410c867Schristos sltu $25,$7,$25 17806410c867Schristos daddu $2,$25 1781e0ea3921Schristos mflo ($24,$13,$10) 1782e0ea3921Schristos mfhi ($25,$13,$10) 17836410c867Schristos daddu $3,$24 17846410c867Schristos sltu $1,$3,$24 1785e0ea3921Schristos dmultu ($14,$9) # forward multiplication 17866410c867Schristos daddu $3,$24 17876410c867Schristos daddu $1,$25 17886410c867Schristos sltu $24,$3,$24 17896410c867Schristos daddu $7,$1 17906410c867Schristos daddu $25,$24 17916410c867Schristos sltu $1,$7,$1 17926410c867Schristos daddu $7,$25 17936410c867Schristos daddu $2,$1 17946410c867Schristos sltu $25,$7,$25 17956410c867Schristos daddu $2,$25 1796e0ea3921Schristos mflo ($24,$14,$9) 1797e0ea3921Schristos mfhi ($25,$14,$9) 17986410c867Schristos daddu $3,$24 17996410c867Schristos sltu $1,$3,$24 1800e0ea3921Schristos dmultu ($15,$8) # forward multiplication 18016410c867Schristos daddu $3,$24 18026410c867Schristos daddu $1,$25 18036410c867Schristos sltu $24,$3,$24 18046410c867Schristos daddu $7,$1 18056410c867Schristos daddu $25,$24 18066410c867Schristos sltu $1,$7,$1 18076410c867Schristos daddu $7,$25 18086410c867Schristos daddu $2,$1 18096410c867Schristos sltu $25,$7,$25 18106410c867Schristos daddu $2,$25 1811e0ea3921Schristos mflo ($24,$15,$8) 1812e0ea3921Schristos mfhi ($25,$15,$8) 18136410c867Schristos daddu $3,$24 18146410c867Schristos sltu $1,$3,$24 1815e0ea3921Schristos dmultu ($11,$13) # forward multiplication 18166410c867Schristos daddu $3,$24 18176410c867Schristos daddu $1,$25 18186410c867Schristos sltu $24,$3,$24 18196410c867Schristos daddu $7,$1 18206410c867Schristos daddu $25,$24 18216410c867Schristos sltu $1,$7,$1 18226410c867Schristos daddu $7,$25 18236410c867Schristos daddu $2,$1 18246410c867Schristos sltu $25,$7,$25 18256410c867Schristos daddu $2,$25 1826e0ea3921Schristos mflo ($24,$11,$13) 1827e0ea3921Schristos mfhi ($25,$11,$13) 18286410c867Schristos sd $3,7*8($4) 18296410c867Schristos daddu $7,$24 18306410c867Schristos sltu $1,$7,$24 1831e0ea3921Schristos dmultu ($10,$14) # forward multiplication 18326410c867Schristos daddu $7,$24 18336410c867Schristos daddu $1,$25 18346410c867Schristos sltu $24,$7,$24 18356410c867Schristos daddu $2,$1 18366410c867Schristos daddu $25,$24 18376410c867Schristos sltu $3,$2,$1 18386410c867Schristos daddu $2,$25 18396410c867Schristos sltu $25,$2,$25 18406410c867Schristos daddu $3,$25 1841e0ea3921Schristos mflo ($24,$10,$14) 1842e0ea3921Schristos mfhi ($25,$10,$14) 18436410c867Schristos daddu $7,$24 18446410c867Schristos sltu $1,$7,$24 1845e0ea3921Schristos dmultu ($9,$15) # forward multiplication 18466410c867Schristos daddu $7,$24 18476410c867Schristos daddu $1,$25 18486410c867Schristos sltu $24,$7,$24 18496410c867Schristos daddu $2,$1 18506410c867Schristos daddu $25,$24 18516410c867Schristos sltu $1,$2,$1 18526410c867Schristos daddu $2,$25 18536410c867Schristos daddu $3,$1 18546410c867Schristos sltu $25,$2,$25 18556410c867Schristos daddu $3,$25 1856e0ea3921Schristos mflo ($24,$9,$15) 1857e0ea3921Schristos mfhi ($25,$9,$15) 18586410c867Schristos daddu $7,$24 18596410c867Schristos sltu $1,$7,$24 1860e0ea3921Schristos dmultu ($8,$8) # forward multiplication 18616410c867Schristos daddu $7,$24 18626410c867Schristos daddu $1,$25 18636410c867Schristos sltu $24,$7,$24 18646410c867Schristos daddu $2,$1 18656410c867Schristos daddu $25,$24 18666410c867Schristos sltu $1,$2,$1 18676410c867Schristos daddu $2,$25 18686410c867Schristos daddu $3,$1 18696410c867Schristos sltu $25,$2,$25 18706410c867Schristos daddu $3,$25 1871e0ea3921Schristos mflo ($24,$8,$8) 1872e0ea3921Schristos mfhi ($25,$8,$8) 18736410c867Schristos daddu $7,$24 18746410c867Schristos sltu $1,$7,$24 1875e0ea3921Schristos dmultu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); 18766410c867Schristos daddu $25,$1 18776410c867Schristos daddu $2,$25 18786410c867Schristos sltu $1,$2,$25 18796410c867Schristos daddu $3,$1 18806410c867Schristos sd $7,8*8($4) 1881e0ea3921Schristos mflo ($24,$14,$11) 1882e0ea3921Schristos mfhi ($25,$14,$11) 18836410c867Schristos daddu $2,$24 18846410c867Schristos sltu $1,$2,$24 1885e0ea3921Schristos dmultu ($15,$10) # forward multiplication 18866410c867Schristos daddu $2,$24 18876410c867Schristos daddu $1,$25 18886410c867Schristos sltu $24,$2,$24 18896410c867Schristos daddu $3,$1 18906410c867Schristos daddu $25,$24 18916410c867Schristos sltu $7,$3,$1 18926410c867Schristos daddu $3,$25 18936410c867Schristos sltu $25,$3,$25 18946410c867Schristos daddu $7,$25 1895e0ea3921Schristos mflo ($24,$15,$10) 1896e0ea3921Schristos mfhi ($25,$15,$10) 18976410c867Schristos daddu $2,$24 18986410c867Schristos sltu $1,$2,$24 1899e0ea3921Schristos dmultu ($8,$9) # forward multiplication 19006410c867Schristos daddu $2,$24 19016410c867Schristos daddu $1,$25 19026410c867Schristos sltu $24,$2,$24 19036410c867Schristos daddu $3,$1 19046410c867Schristos daddu $25,$24 19056410c867Schristos sltu $1,$3,$1 19066410c867Schristos daddu $3,$25 19076410c867Schristos daddu $7,$1 19086410c867Schristos sltu $25,$3,$25 19096410c867Schristos daddu $7,$25 1910e0ea3921Schristos mflo ($24,$8,$9) 1911e0ea3921Schristos mfhi ($25,$8,$9) 19126410c867Schristos daddu $2,$24 19136410c867Schristos sltu $1,$2,$24 1914e0ea3921Schristos dmultu ($11,$15) # forward multiplication 19156410c867Schristos daddu $2,$24 19166410c867Schristos daddu $1,$25 19176410c867Schristos sltu $24,$2,$24 19186410c867Schristos daddu $3,$1 19196410c867Schristos daddu $25,$24 19206410c867Schristos sltu $1,$3,$1 19216410c867Schristos daddu $3,$25 19226410c867Schristos daddu $7,$1 19236410c867Schristos sltu $25,$3,$25 19246410c867Schristos daddu $7,$25 1925e0ea3921Schristos mflo ($24,$11,$15) 1926e0ea3921Schristos mfhi ($25,$11,$15) 19276410c867Schristos sd $2,9*8($4) 19286410c867Schristos daddu $3,$24 19296410c867Schristos sltu $1,$3,$24 1930e0ea3921Schristos dmultu ($10,$8) # forward multiplication 19316410c867Schristos daddu $3,$24 19326410c867Schristos daddu $1,$25 19336410c867Schristos sltu $24,$3,$24 19346410c867Schristos daddu $7,$1 19356410c867Schristos daddu $25,$24 19366410c867Schristos sltu $2,$7,$1 19376410c867Schristos daddu $7,$25 19386410c867Schristos sltu $25,$7,$25 19396410c867Schristos daddu $2,$25 1940e0ea3921Schristos mflo ($24,$10,$8) 1941e0ea3921Schristos mfhi ($25,$10,$8) 19426410c867Schristos daddu $3,$24 19436410c867Schristos sltu $1,$3,$24 1944e0ea3921Schristos dmultu ($9,$9) # forward multiplication 19456410c867Schristos daddu $3,$24 19466410c867Schristos daddu $1,$25 19476410c867Schristos sltu $24,$3,$24 19486410c867Schristos daddu $7,$1 19496410c867Schristos daddu $25,$24 19506410c867Schristos sltu $1,$7,$1 19516410c867Schristos daddu $7,$25 19526410c867Schristos daddu $2,$1 19536410c867Schristos sltu $25,$7,$25 19546410c867Schristos daddu $2,$25 1955e0ea3921Schristos mflo ($24,$9,$9) 1956e0ea3921Schristos mfhi ($25,$9,$9) 19576410c867Schristos daddu $3,$24 19586410c867Schristos sltu $1,$3,$24 1959e0ea3921Schristos dmultu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); 19606410c867Schristos daddu $25,$1 19616410c867Schristos daddu $7,$25 19626410c867Schristos sltu $1,$7,$25 19636410c867Schristos daddu $2,$1 19646410c867Schristos sd $3,10*8($4) 1965e0ea3921Schristos mflo ($24,$8,$11) 1966e0ea3921Schristos mfhi ($25,$8,$11) 19676410c867Schristos daddu $7,$24 19686410c867Schristos sltu $1,$7,$24 1969e0ea3921Schristos dmultu ($9,$10) # forward multiplication 19706410c867Schristos daddu $7,$24 19716410c867Schristos daddu $1,$25 19726410c867Schristos sltu $24,$7,$24 19736410c867Schristos daddu $2,$1 19746410c867Schristos daddu $25,$24 19756410c867Schristos sltu $3,$2,$1 19766410c867Schristos daddu $2,$25 19776410c867Schristos sltu $25,$2,$25 19786410c867Schristos daddu $3,$25 1979e0ea3921Schristos mflo ($24,$9,$10) 1980e0ea3921Schristos mfhi ($25,$9,$10) 19816410c867Schristos daddu $7,$24 19826410c867Schristos sltu $1,$7,$24 1983e0ea3921Schristos dmultu ($11,$9) # forward multiplication 19846410c867Schristos daddu $7,$24 19856410c867Schristos daddu $1,$25 19866410c867Schristos sltu $24,$7,$24 19876410c867Schristos daddu $2,$1 19886410c867Schristos daddu $25,$24 19896410c867Schristos sltu $1,$2,$1 19906410c867Schristos daddu $2,$25 19916410c867Schristos daddu $3,$1 19926410c867Schristos sltu $25,$2,$25 19936410c867Schristos daddu $3,$25 1994e0ea3921Schristos mflo ($24,$11,$9) 1995e0ea3921Schristos mfhi ($25,$11,$9) 19966410c867Schristos sd $7,11*8($4) 19976410c867Schristos daddu $2,$24 19986410c867Schristos sltu $1,$2,$24 1999e0ea3921Schristos dmultu ($10,$10) # forward multiplication 20006410c867Schristos daddu $2,$24 20016410c867Schristos daddu $1,$25 20026410c867Schristos sltu $24,$2,$24 20036410c867Schristos daddu $3,$1 20046410c867Schristos daddu $25,$24 20056410c867Schristos sltu $7,$3,$1 20066410c867Schristos daddu $3,$25 20076410c867Schristos sltu $25,$3,$25 20086410c867Schristos daddu $7,$25 2009e0ea3921Schristos mflo ($24,$10,$10) 2010e0ea3921Schristos mfhi ($25,$10,$10) 20116410c867Schristos daddu $2,$24 20126410c867Schristos sltu $1,$2,$24 2013e0ea3921Schristos dmultu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); 20146410c867Schristos daddu $25,$1 20156410c867Schristos daddu $3,$25 20166410c867Schristos sltu $1,$3,$25 20176410c867Schristos daddu $7,$1 20186410c867Schristos sd $2,12*8($4) 2019e0ea3921Schristos mflo ($24,$10,$11) 2020e0ea3921Schristos mfhi ($25,$10,$11) 20216410c867Schristos daddu $3,$24 20226410c867Schristos sltu $1,$3,$24 2023e0ea3921Schristos dmultu ($11,$11) # forward multiplication 20246410c867Schristos daddu $3,$24 20256410c867Schristos daddu $1,$25 20266410c867Schristos sltu $24,$3,$24 20276410c867Schristos daddu $7,$1 20286410c867Schristos daddu $25,$24 20296410c867Schristos sltu $2,$7,$1 20306410c867Schristos daddu $7,$25 20316410c867Schristos sltu $25,$7,$25 20326410c867Schristos daddu $2,$25 2033e0ea3921Schristos mflo ($24,$11,$11) 2034e0ea3921Schristos mfhi ($25,$11,$11) 20356410c867Schristos sd $3,13*8($4) 20366410c867Schristos 20376410c867Schristos daddu $7,$24 20386410c867Schristos sltu $1,$7,$24 20396410c867Schristos daddu $25,$1 20406410c867Schristos daddu $2,$25 20416410c867Schristos sd $7,14*8($4) 20426410c867Schristos sd $2,15*8($4) 20436410c867Schristos 20446410c867Schristos .set noreorder 20456410c867Schristos jr $31 20466410c867Schristos nop 20476410c867Schristos.end bn_sqr_comba8 20486410c867Schristos 20496410c867Schristos.align 5 20506410c867Schristos.globl bn_sqr_comba4 20516410c867Schristos.ent bn_sqr_comba4 20526410c867Schristosbn_sqr_comba4: 20536410c867Schristos .set reorder 20546410c867Schristos ld $12,0($5) 20556410c867Schristos ld $13,8($5) 2056e0ea3921Schristos dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 20576410c867Schristos ld $14,2*8($5) 20586410c867Schristos ld $15,3*8($5) 2059e0ea3921Schristos mflo ($2,$12,$12) 2060e0ea3921Schristos mfhi ($3,$12,$12) 20616410c867Schristos sd $2,0($4) 20626410c867Schristos 2063e0ea3921Schristos dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 2064e0ea3921Schristos mflo ($24,$12,$13) 2065e0ea3921Schristos mfhi ($25,$12,$13) 20666410c867Schristos slt $2,$25,$0 20676410c867Schristos dsll $25,1 2068e0ea3921Schristos dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 20696410c867Schristos slt $6,$24,$0 20706410c867Schristos daddu $25,$6 20716410c867Schristos dsll $24,1 20726410c867Schristos daddu $3,$24 20736410c867Schristos sltu $1,$3,$24 20746410c867Schristos daddu $7,$25,$1 20756410c867Schristos sd $3,8($4) 2076*1b3d6f93Schristos sltu $1,$7,$25 2077*1b3d6f93Schristos daddu $2,$1 2078e0ea3921Schristos mflo ($24,$14,$12) 2079e0ea3921Schristos mfhi ($25,$14,$12) 20806410c867Schristos daddu $7,$24 20816410c867Schristos sltu $1,$7,$24 2082e0ea3921Schristos dmultu ($13,$13) # forward multiplication 20836410c867Schristos daddu $7,$24 20846410c867Schristos daddu $1,$25 20856410c867Schristos sltu $24,$7,$24 20866410c867Schristos daddu $2,$1 20876410c867Schristos daddu $25,$24 20886410c867Schristos sltu $3,$2,$1 20896410c867Schristos daddu $2,$25 20906410c867Schristos sltu $25,$2,$25 20916410c867Schristos daddu $3,$25 2092e0ea3921Schristos mflo ($24,$13,$13) 2093e0ea3921Schristos mfhi ($25,$13,$13) 20946410c867Schristos daddu $7,$24 20956410c867Schristos sltu $1,$7,$24 2096e0ea3921Schristos dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 20976410c867Schristos daddu $25,$1 20986410c867Schristos daddu $2,$25 20996410c867Schristos sltu $1,$2,$25 21006410c867Schristos daddu $3,$1 21016410c867Schristos sd $7,2*8($4) 2102e0ea3921Schristos mflo ($24,$12,$15) 2103e0ea3921Schristos mfhi ($25,$12,$15) 21046410c867Schristos daddu $2,$24 21056410c867Schristos sltu $1,$2,$24 2106e0ea3921Schristos dmultu ($13,$14) # forward multiplication 21076410c867Schristos daddu $2,$24 21086410c867Schristos daddu $1,$25 21096410c867Schristos sltu $24,$2,$24 21106410c867Schristos daddu $3,$1 21116410c867Schristos daddu $25,$24 21126410c867Schristos sltu $7,$3,$1 21136410c867Schristos daddu $3,$25 21146410c867Schristos sltu $25,$3,$25 21156410c867Schristos daddu $7,$25 2116e0ea3921Schristos mflo ($24,$13,$14) 2117e0ea3921Schristos mfhi ($25,$13,$14) 21186410c867Schristos daddu $2,$24 21196410c867Schristos sltu $1,$2,$24 2120e0ea3921Schristos dmultu ($15,$13) # forward multiplication 21216410c867Schristos daddu $2,$24 21226410c867Schristos daddu $1,$25 21236410c867Schristos sltu $24,$2,$24 21246410c867Schristos daddu $3,$1 21256410c867Schristos daddu $25,$24 21266410c867Schristos sltu $1,$3,$1 21276410c867Schristos daddu $3,$25 21286410c867Schristos daddu $7,$1 21296410c867Schristos sltu $25,$3,$25 21306410c867Schristos daddu $7,$25 2131e0ea3921Schristos mflo ($24,$15,$13) 2132e0ea3921Schristos mfhi ($25,$15,$13) 21336410c867Schristos sd $2,3*8($4) 21346410c867Schristos daddu $3,$24 21356410c867Schristos sltu $1,$3,$24 2136e0ea3921Schristos dmultu ($14,$14) # forward multiplication 21376410c867Schristos daddu $3,$24 21386410c867Schristos daddu $1,$25 21396410c867Schristos sltu $24,$3,$24 21406410c867Schristos daddu $7,$1 21416410c867Schristos daddu $25,$24 21426410c867Schristos sltu $2,$7,$1 21436410c867Schristos daddu $7,$25 21446410c867Schristos sltu $25,$7,$25 21456410c867Schristos daddu $2,$25 2146e0ea3921Schristos mflo ($24,$14,$14) 2147e0ea3921Schristos mfhi ($25,$14,$14) 21486410c867Schristos daddu $3,$24 21496410c867Schristos sltu $1,$3,$24 2150e0ea3921Schristos dmultu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); 21516410c867Schristos daddu $25,$1 21526410c867Schristos daddu $7,$25 21536410c867Schristos sltu $1,$7,$25 21546410c867Schristos daddu $2,$1 21556410c867Schristos sd $3,4*8($4) 2156e0ea3921Schristos mflo ($24,$14,$15) 2157e0ea3921Schristos mfhi ($25,$14,$15) 21586410c867Schristos daddu $7,$24 21596410c867Schristos sltu $1,$7,$24 2160e0ea3921Schristos dmultu ($15,$15) # forward multiplication 21616410c867Schristos daddu $7,$24 21626410c867Schristos daddu $1,$25 21636410c867Schristos sltu $24,$7,$24 21646410c867Schristos daddu $2,$1 21656410c867Schristos daddu $25,$24 21666410c867Schristos sltu $3,$2,$1 21676410c867Schristos daddu $2,$25 21686410c867Schristos sltu $25,$2,$25 21696410c867Schristos daddu $3,$25 2170e0ea3921Schristos mflo ($24,$15,$15) 2171e0ea3921Schristos mfhi ($25,$15,$15) 21726410c867Schristos sd $7,5*8($4) 21736410c867Schristos 21746410c867Schristos daddu $2,$24 21756410c867Schristos sltu $1,$2,$24 21766410c867Schristos daddu $25,$1 21776410c867Schristos daddu $3,$25 21786410c867Schristos sd $2,6*8($4) 21796410c867Schristos sd $3,7*8($4) 21806410c867Schristos 21816410c867Schristos .set noreorder 21826410c867Schristos jr $31 21836410c867Schristos nop 21846410c867Schristos.end bn_sqr_comba4 2185