1#include "mips_arch.h" 2 3.text 4 5.set noat 6.set noreorder 7 8.align 5 9.globl bn_mul_mont 10.ent bn_mul_mont 11bn_mul_mont: 12 lw $8,16($29) 13 lw $9,20($29) 14 slt $1,$9,4 15 bnez $1,1f 16 li $2,0 17 slt $1,$9,17 # on in-order CPU 18 bnez $1,bn_mul_mont_internal 19 nop 201: jr $31 21 li $4,0 22.end bn_mul_mont 23 24.align 5 25.ent bn_mul_mont_internal 26bn_mul_mont_internal: 27 .frame $30,14*4,$31 28 .mask 0x40000000|16711680,-4 29 subu $29,14*4 30 sw $30,(14-1)*4($29) 31 sw $23,(14-2)*4($29) 32 sw $22,(14-3)*4($29) 33 sw $21,(14-4)*4($29) 34 sw $20,(14-5)*4($29) 35 sw $19,(14-6)*4($29) 36 sw $18,(14-7)*4($29) 37 sw $17,(14-8)*4($29) 38 sw $16,(14-9)*4($29) 39 move $30,$29 40 41 .set reorder 42 lw $8,0($8) 43 lw $13,0($6) # bp[0] 44 lw $12,0($5) # ap[0] 45 lw $14,0($7) # np[0] 46 47 subu $29,2*4 # place for two extra words 48 sll $9,2 49 li $1,-4096 50 subu $29,$9 51 and $29,$1 52 53 multu ($12,$13) 54 lw $17,4($5) 55 lw $19,4($7) 56 mflo ($10,$12,$13) 57 mfhi ($11,$12,$13) 58 multu ($10,$8) 59 mflo ($23,$10,$8) 60 61 multu ($17,$13) 62 mflo ($16,$17,$13) 63 mfhi ($17,$17,$13) 64 65 multu ($14,$23) 66 mflo ($24,$14,$23) 67 mfhi ($25,$14,$23) 68 multu ($19,$23) 69 addu $24,$10 70 sltu $1,$24,$10 71 addu $25,$1 72 mflo ($18,$19,$23) 73 mfhi ($19,$19,$23) 74 75 move $15,$29 76 li $22,2*4 77.align 4 78.L1st: 79 .set noreorder 80 addu $12,$5,$22 81 addu $14,$7,$22 82 lw $12,($12) 83 lw $14,($14) 84 85 multu ($12,$13) 86 addu $10,$16,$11 87 addu $24,$18,$25 88 sltu $1,$10,$11 89 sltu $2,$24,$25 90 addu $11,$17,$1 91 addu $25,$19,$2 92 mflo ($16,$12,$13) 93 mfhi ($17,$12,$13) 94 95 addu $24,$10 96 sltu $1,$24,$10 97 multu ($14,$23) 98 addu $25,$1 99 addu $22,4 100 sw $24,($15) 101 sltu $2,$22,$9 102 mflo ($18,$14,$23) 103 mfhi ($19,$14,$23) 104 105 bnez $2,.L1st 106 addu $15,4 107 .set reorder 108 109 addu $10,$16,$11 110 sltu $1,$10,$11 111 addu $11,$17,$1 112 113 addu $24,$18,$25 114 sltu $2,$24,$25 115 addu $25,$19,$2 116 addu $24,$10 117 sltu $1,$24,$10 118 addu $25,$1 119 120 sw $24,($15) 121 122 addu $25,$11 123 sltu $1,$25,$11 124 sw $25,4($15) 125 sw $1,2*4($15) 126 127 li $21,4 128.align 4 129.Louter: 130 addu $13,$6,$21 131 lw $13,($13) 132 lw $12,($5) 133 lw $17,4($5) 134 lw $20,($29) 135 136 multu ($12,$13) 137 lw $14,($7) 138 lw $19,4($7) 139 mflo ($10,$12,$13) 140 mfhi ($11,$12,$13) 141 addu $10,$20 142 multu ($10,$8) 143 sltu $1,$10,$20 144 addu $11,$1 145 mflo ($23,$10,$8) 146 147 multu ($17,$13) 148 mflo ($16,$17,$13) 149 mfhi ($17,$17,$13) 150 151 multu ($14,$23) 152 mflo ($24,$14,$23) 153 mfhi ($25,$14,$23) 154 155 multu ($19,$23) 156 addu $24,$10 157 sltu $1,$24,$10 158 addu $25,$1 159 mflo ($18,$19,$23) 160 mfhi ($19,$19,$23) 161 162 move $15,$29 163 li $22,2*4 164 lw $20,4($15) 165.align 4 166.Linner: 167 .set noreorder 168 addu $12,$5,$22 169 addu $14,$7,$22 170 lw $12,($12) 171 lw $14,($14) 172 173 multu ($12,$13) 174 addu $10,$16,$11 175 addu $24,$18,$25 176 sltu $1,$10,$11 177 sltu $2,$24,$25 178 addu $11,$17,$1 179 addu $25,$19,$2 180 mflo ($16,$12,$13) 181 mfhi ($17,$12,$13) 182 183 addu $10,$20 184 addu $22,4 185 multu ($14,$23) 186 sltu $1,$10,$20 187 addu $24,$10 188 addu $11,$1 189 sltu $2,$24,$10 190 lw $20,2*4($15) 191 addu $25,$2 192 sltu $1,$22,$9 193 mflo ($18,$14,$23) 194 mfhi ($19,$14,$23) 195 sw $24,($15) 196 bnez $1,.Linner 197 addu $15,4 198 .set reorder 199 200 addu $10,$16,$11 201 sltu $1,$10,$11 202 addu $11,$17,$1 203 addu $10,$20 204 sltu $2,$10,$20 205 addu $11,$2 206 207 lw $20,2*4($15) 208 addu $24,$18,$25 209 sltu $1,$24,$25 210 addu $25,$19,$1 211 addu $24,$10 212 sltu $2,$24,$10 213 addu $25,$2 214 sw $24,($15) 215 216 addu $24,$25,$11 217 sltu $25,$24,$11 218 addu $24,$20 219 sltu $1,$24,$20 220 addu $25,$1 221 sw $24,4($15) 222 sw $25,2*4($15) 223 224 addu $21,4 225 sltu $2,$21,$9 226 bnez $2,.Louter 227 228 .set noreorder 229 addu $20,$29,$9 # &tp[num] 230 move $15,$29 231 move $5,$29 232 li $11,0 # clear borrow bit 233 234.align 4 235.Lsub: lw $10,($15) 236 lw $24,($7) 237 addu $15,4 238 addu $7,4 239 subu $24,$10,$24 # tp[i]-np[i] 240 sgtu $1,$24,$10 241 subu $10,$24,$11 242 sgtu $11,$10,$24 243 sw $10,($4) 244 or $11,$1 245 sltu $1,$15,$20 246 bnez $1,.Lsub 247 addu $4,4 248 249 subu $11,$25,$11 # handle upmost overflow bit 250 move $15,$29 251 subu $4,$9 # restore rp 252 not $25,$11 253 254.Lcopy: lw $14,($15) # conditional move 255 lw $12,($4) 256 sw $0,($15) 257 addu $15,4 258 and $14,$11 259 and $12,$25 260 or $12,$14 261 sltu $1,$15,$20 262 sw $12,($4) 263 bnez $1,.Lcopy 264 addu $4,4 265 266 li $4,1 267 li $2,1 268 269 .set noreorder 270 move $29,$30 271 lw $30,(14-1)*4($29) 272 lw $23,(14-2)*4($29) 273 lw $22,(14-3)*4($29) 274 lw $21,(14-4)*4($29) 275 lw $20,(14-5)*4($29) 276 lw $19,(14-6)*4($29) 277 lw $18,(14-7)*4($29) 278 lw $17,(14-8)*4($29) 279 lw $16,(14-9)*4($29) 280 jr $31 281 addu $29,14*4 282.end bn_mul_mont_internal 283.rdata 284.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" 285