1.text 2 3.set noat 4.set noreorder 5 6.align 5 7.globl bn_mul_mont 8.ent bn_mul_mont 9bn_mul_mont: 10 slt $1,$9,4 11 bnez $1,1f 12 li $2,0 13 slt $1,$9,17 # on in-order CPU 14 bnez $1,bn_mul_mont_internal 15 nop 161: jr $31 17 li $4,0 18.end bn_mul_mont 19 20.align 5 21.ent bn_mul_mont_internal 22bn_mul_mont_internal: 23 .frame $30,14*8,$31 24 .mask 0x40000000|16711680,-8 25 dsub $29,14*8 26 sd $30,(14-1)*8($29) 27 sd $23,(14-2)*8($29) 28 sd $22,(14-3)*8($29) 29 sd $21,(14-4)*8($29) 30 sd $20,(14-5)*8($29) 31 sd $19,(14-6)*8($29) 32 sd $18,(14-7)*8($29) 33 sd $17,(14-8)*8($29) 34 sd $16,(14-9)*8($29) 35 move $30,$29 36 37 .set reorder 38 ld $8,0($8) 39 ld $13,0($6) # bp[0] 40 ld $12,0($5) # ap[0] 41 ld $14,0($7) # np[0] 42 43 dsub $29,2*8 # place for two extra words 44 sll $9,3 45 li $1,-4096 46 dsub $29,$9 47 and $29,$1 48 49 dmultu $12,$13 50 ld $16,8($5) 51 ld $18,8($7) 52 mflo $10 53 mfhi $11 54 dmultu $10,$8 55 mflo $23 56 57 dmultu $16,$13 58 mflo $16 59 mfhi $17 60 61 dmultu $14,$23 62 mflo $24 63 mfhi $25 64 dmultu $18,$23 65 daddu $24,$10 66 sltu $1,$24,$10 67 daddu $25,$1 68 mflo $18 69 mfhi $19 70 71 move $15,$29 72 li $22,2*8 73.align 4 74.L1st: 75 .set noreorder 76 dadd $12,$5,$22 77 dadd $14,$7,$22 78 ld $12,($12) 79 ld $14,($14) 80 81 dmultu $12,$13 82 daddu $10,$16,$11 83 daddu $24,$18,$25 84 sltu $1,$10,$11 85 sltu $2,$24,$25 86 daddu $11,$17,$1 87 daddu $25,$19,$2 88 mflo $16 89 mfhi $17 90 91 daddu $24,$10 92 sltu $1,$24,$10 93 dmultu $14,$23 94 daddu $25,$1 95 addu $22,8 96 sd $24,($15) 97 sltu $2,$22,$9 98 mflo $18 99 mfhi $19 100 101 bnez $2,.L1st 102 dadd $15,8 103 .set reorder 104 105 daddu $10,$16,$11 106 sltu $1,$10,$11 107 daddu $11,$17,$1 108 109 daddu $24,$18,$25 110 sltu $2,$24,$25 111 daddu $25,$19,$2 112 daddu $24,$10 113 sltu $1,$24,$10 114 daddu $25,$1 115 116 sd $24,($15) 117 118 daddu $25,$11 119 sltu $1,$25,$11 120 sd $25,8($15) 121 sd $1,2*8($15) 122 123 li $21,8 124.align 4 125.Louter: 126 dadd $13,$6,$21 127 ld $13,($13) 128 ld $12,($5) 129 ld $16,8($5) 130 ld $20,($29) 131 132 dmultu $12,$13 133 ld $14,($7) 134 ld $18,8($7) 135 mflo $10 136 mfhi $11 137 daddu $10,$20 138 dmultu $10,$8 139 sltu $1,$10,$20 140 daddu $11,$1 141 mflo $23 142 143 dmultu $16,$13 144 mflo $16 145 mfhi $17 146 147 dmultu $14,$23 148 mflo $24 149 mfhi $25 150 151 dmultu $18,$23 152 daddu $24,$10 153 sltu $1,$24,$10 154 daddu $25,$1 155 mflo $18 156 mfhi $19 157 158 move $15,$29 159 li $22,2*8 160 ld $20,8($15) 161.align 4 162.Linner: 163 .set noreorder 164 dadd $12,$5,$22 165 dadd $14,$7,$22 166 ld $12,($12) 167 ld $14,($14) 168 169 dmultu $12,$13 170 daddu $10,$16,$11 171 daddu $24,$18,$25 172 sltu $1,$10,$11 173 sltu $2,$24,$25 174 daddu $11,$17,$1 175 daddu $25,$19,$2 176 mflo $16 177 mfhi $17 178 179 daddu $10,$20 180 addu $22,8 181 dmultu $14,$23 182 sltu $1,$10,$20 183 daddu $24,$10 184 daddu $11,$1 185 sltu $2,$24,$10 186 ld $20,2*8($15) 187 daddu $25,$2 188 sltu $1,$22,$9 189 mflo $18 190 mfhi $19 191 sd $24,($15) 192 bnez $1,.Linner 193 dadd $15,8 194 .set reorder 195 196 daddu $10,$16,$11 197 sltu $1,$10,$11 198 daddu $11,$17,$1 199 daddu $10,$20 200 sltu $2,$10,$20 201 daddu $11,$2 202 203 ld $20,2*8($15) 204 daddu $24,$18,$25 205 sltu $1,$24,$25 206 daddu $25,$19,$1 207 daddu $24,$10 208 sltu $2,$24,$10 209 daddu $25,$2 210 sd $24,($15) 211 212 daddu $24,$25,$11 213 sltu $25,$24,$11 214 daddu $24,$20 215 sltu $1,$24,$20 216 daddu $25,$1 217 sd $24,8($15) 218 sd $25,2*8($15) 219 220 addu $21,8 221 sltu $2,$21,$9 222 bnez $2,.Louter 223 224 .set noreorder 225 dadd $20,$29,$9 # &tp[num] 226 move $15,$29 227 move $5,$29 228 li $11,0 # clear borrow bit 229 230.align 4 231.Lsub: ld $10,($15) 232 ld $24,($7) 233 dadd $15,8 234 dadd $7,8 235 dsubu $24,$10,$24 # tp[i]-np[i] 236 sgtu $1,$24,$10 237 dsubu $10,$24,$11 238 sgtu $11,$10,$24 239 sd $10,($4) 240 or $11,$1 241 sltu $1,$15,$20 242 bnez $1,.Lsub 243 dadd $4,8 244 245 dsubu $11,$25,$11 # handle upmost overflow bit 246 move $15,$29 247 dsub $4,$9 # restore rp 248 not $25,$11 249 250 and $5,$11,$29 251 and $6,$25,$4 252 or $5,$5,$6 # ap=borrow?tp:rp 253 254.align 4 255.Lcopy: ld $12,($5) 256 dadd $5,8 257 sd $0,($15) 258 dadd $15,8 259 sltu $1,$15,$20 260 sd $12,($4) 261 bnez $1,.Lcopy 262 dadd $4,8 263 264 li $4,1 265 li $2,1 266 267 .set noreorder 268 move $29,$30 269 ld $30,(14-1)*8($29) 270 ld $23,(14-2)*8($29) 271 ld $22,(14-3)*8($29) 272 ld $21,(14-4)*8($29) 273 ld $20,(14-5)*8($29) 274 ld $19,(14-6)*8($29) 275 ld $18,(14-7)*8($29) 276 ld $17,(14-8)*8($29) 277 ld $16,(14-9)*8($29) 278 jr $31 279 dadd $29,14*8 280.end bn_mul_mont_internal 281.rdata 282.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" 283