1.register %g2,#scratch 2.register %g3,#scratch 3.section ".text",#alloc,#execinstr 4 5.align 64 6rem_4bit: 7 .long 0,0,471859200,0,943718400,0,610271232,0 8 .long 1887436800,0,1822425088,0,1220542464,0,1423966208,0 9 .long 3774873600,0,4246732800,0,3644850176,0,3311403008,0 10 .long 2441084928,0,2376073216,0,2847932416,0,3051356160,0 11.type rem_4bit,#object 12.size rem_4bit,(.-rem_4bit) 13 14.globl gcm_ghash_4bit 15.align 32 16gcm_ghash_4bit: 17 save %sp,-192,%sp 18 ldub [%i2+15],%l1 19 ldub [%i0+15],%l2 20 ldub [%i0+14],%l3 21 add %i3,%i2,%i3 22 add %i1,8,%l6 23 241: call .+8 25 add %o7,rem_4bit-1b,%l4 26 27.Louter: 28 xor %l2,%l1,%l1 29 and %l1,0xf0,%l0 30 and %l1,0x0f,%l1 31 sll %l1,4,%l1 32 ldx [%l6+%l1],%o1 33 ldx [%i1+%l1],%o0 34 35 ldub [%i2+14],%l1 36 37 ldx [%l6+%l0],%o3 38 and %o1,0xf,%l5 39 ldx [%i1+%l0],%o2 40 sll %l5,3,%l5 41 ldx [%l4+%l5],%o4 42 srlx %o1,4,%o1 43 mov 13,%l7 44 sllx %o0,60,%o5 45 xor %o3,%o1,%o1 46 srlx %o0,4,%o0 47 xor %o1,%o5,%o1 48 49 xor %l3,%l1,%l1 50 and %o1,0xf,%l5 51 and %l1,0xf0,%l0 52 and %l1,0x0f,%l1 53 ba .Lghash_inner 54 sll %l1,4,%l1 55.align 32 56.Lghash_inner: 57 ldx [%l6+%l1],%o3 58 sll %l5,3,%l5 59 xor %o2,%o0,%o0 60 ldx [%i1+%l1],%o2 61 srlx %o1,4,%o1 62 xor %o4,%o0,%o0 63 ldx [%l4+%l5],%o4 64 sllx %o0,60,%o5 65 xor %o3,%o1,%o1 66 ldub [%i2+%l7],%l1 67 srlx %o0,4,%o0 68 xor %o1,%o5,%o1 69 ldub [%i0+%l7],%l3 70 xor %o2,%o0,%o0 71 and %o1,0xf,%l5 72 73 ldx [%l6+%l0],%o3 74 sll %l5,3,%l5 75 xor %o4,%o0,%o0 76 ldx [%i1+%l0],%o2 77 srlx %o1,4,%o1 78 ldx [%l4+%l5],%o4 79 sllx %o0,60,%o5 80 xor %l3,%l1,%l1 81 srlx %o0,4,%o0 82 and %l1,0xf0,%l0 83 addcc %l7,-1,%l7 84 xor %o1,%o5,%o1 85 and %l1,0x0f,%l1 86 xor %o3,%o1,%o1 87 sll %l1,4,%l1 88 blu .Lghash_inner 89 and %o1,0xf,%l5 90 91 ldx [%l6+%l1],%o3 92 sll %l5,3,%l5 93 xor %o2,%o0,%o0 94 ldx [%i1+%l1],%o2 95 srlx %o1,4,%o1 96 xor %o4,%o0,%o0 97 ldx [%l4+%l5],%o4 98 sllx %o0,60,%o5 99 xor %o3,%o1,%o1 100 srlx %o0,4,%o0 101 xor %o1,%o5,%o1 102 xor %o2,%o0,%o0 103 104 add %i2,16,%i2 105 cmp %i2,%i3 106 be,pn %xcc,.Ldone 107 and %o1,0xf,%l5 108 109 ldx [%l6+%l0],%o3 110 sll %l5,3,%l5 111 xor %o4,%o0,%o0 112 ldx [%i1+%l0],%o2 113 srlx %o1,4,%o1 114 ldx [%l4+%l5],%o4 115 sllx %o0,60,%o5 116 xor %o3,%o1,%o1 117 ldub [%i2+15],%l1 118 srlx %o0,4,%o0 119 xor %o1,%o5,%o1 120 xor %o2,%o0,%o0 121 stx %o1,[%i0+8] 122 xor %o4,%o0,%o0 123 stx %o0,[%i0] 124 srl %o1,8,%l3 125 and %o1,0xff,%l2 126 ba .Louter 127 and %l3,0xff,%l3 128.align 32 129.Ldone: 130 ldx [%l6+%l0],%o3 131 sll %l5,3,%l5 132 xor %o4,%o0,%o0 133 ldx [%i1+%l0],%o2 134 srlx %o1,4,%o1 135 ldx [%l4+%l5],%o4 136 sllx %o0,60,%o5 137 xor %o3,%o1,%o1 138 srlx %o0,4,%o0 139 xor %o1,%o5,%o1 140 xor %o2,%o0,%o0 141 stx %o1,[%i0+8] 142 xor %o4,%o0,%o0 143 stx %o0,[%i0] 144 145 ret 146 restore 147.type gcm_ghash_4bit,#function 148.size gcm_ghash_4bit,(.-gcm_ghash_4bit) 149.globl gcm_gmult_4bit 150.align 32 151gcm_gmult_4bit: 152 save %sp,-192,%sp 153 ldub [%i0+15],%l1 154 add %i1,8,%l6 155 1561: call .+8 157 add %o7,rem_4bit-1b,%l4 158 159 and %l1,0xf0,%l0 160 and %l1,0x0f,%l1 161 sll %l1,4,%l1 162 ldx [%l6+%l1],%o1 163 ldx [%i1+%l1],%o0 164 165 ldub [%i0+14],%l1 166 167 ldx [%l6+%l0],%o3 168 and %o1,0xf,%l5 169 ldx [%i1+%l0],%o2 170 sll %l5,3,%l5 171 ldx [%l4+%l5],%o4 172 srlx %o1,4,%o1 173 mov 13,%l7 174 sllx %o0,60,%o5 175 xor %o3,%o1,%o1 176 srlx %o0,4,%o0 177 xor %o1,%o5,%o1 178 179 and %o1,0xf,%l5 180 and %l1,0xf0,%l0 181 and %l1,0x0f,%l1 182 ba .Lgmult_inner 183 sll %l1,4,%l1 184.align 32 185.Lgmult_inner: 186 ldx [%l6+%l1],%o3 187 sll %l5,3,%l5 188 xor %o2,%o0,%o0 189 ldx [%i1+%l1],%o2 190 srlx %o1,4,%o1 191 xor %o4,%o0,%o0 192 ldx [%l4+%l5],%o4 193 sllx %o0,60,%o5 194 xor %o3,%o1,%o1 195 ldub [%i0+%l7],%l1 196 srlx %o0,4,%o0 197 xor %o1,%o5,%o1 198 xor %o2,%o0,%o0 199 and %o1,0xf,%l5 200 201 ldx [%l6+%l0],%o3 202 sll %l5,3,%l5 203 xor %o4,%o0,%o0 204 ldx [%i1+%l0],%o2 205 srlx %o1,4,%o1 206 ldx [%l4+%l5],%o4 207 sllx %o0,60,%o5 208 srlx %o0,4,%o0 209 and %l1,0xf0,%l0 210 addcc %l7,-1,%l7 211 xor %o1,%o5,%o1 212 and %l1,0x0f,%l1 213 xor %o3,%o1,%o1 214 sll %l1,4,%l1 215 blu .Lgmult_inner 216 and %o1,0xf,%l5 217 218 ldx [%l6+%l1],%o3 219 sll %l5,3,%l5 220 xor %o2,%o0,%o0 221 ldx [%i1+%l1],%o2 222 srlx %o1,4,%o1 223 xor %o4,%o0,%o0 224 ldx [%l4+%l5],%o4 225 sllx %o0,60,%o5 226 xor %o3,%o1,%o1 227 srlx %o0,4,%o0 228 xor %o1,%o5,%o1 229 xor %o2,%o0,%o0 230 and %o1,0xf,%l5 231 232 ldx [%l6+%l0],%o3 233 sll %l5,3,%l5 234 xor %o4,%o0,%o0 235 ldx [%i1+%l0],%o2 236 srlx %o1,4,%o1 237 ldx [%l4+%l5],%o4 238 sllx %o0,60,%o5 239 xor %o3,%o1,%o1 240 srlx %o0,4,%o0 241 xor %o1,%o5,%o1 242 xor %o2,%o0,%o0 243 stx %o1,[%i0+8] 244 xor %o4,%o0,%o0 245 stx %o0,[%i0] 246 247 ret 248 restore 249.type gcm_gmult_4bit,#function 250.size gcm_gmult_4bit,(.-gcm_gmult_4bit) 251.globl gcm_init_vis3 252.align 32 253gcm_init_vis3: 254 save %sp,-192,%sp 255 256 ldx [%i1+0],%o2 257 ldx [%i1+8],%o1 258 mov 0xE1,%o4 259 mov 1,%o3 260 sllx %o4,57,%o4 261 srax %o2,63,%g1 ! broadcast carry 262 addcc %o1,%o1,%o1 ! H<<=1 263 .word 0x95b2822a !addxc %o2,%o2,%o2 264 and %g1,%o3,%o3 265 and %g1,%o4,%o4 266 xor %o3,%o1,%o1 267 xor %o4,%o2,%o2 268 stx %o1,[%i0+8] ! save twisted H 269 stx %o2,[%i0+0] 270 271 sethi %hi(0xA0406080),%g5 272 sethi %hi(0x20C0E000),%l0 273 or %g5,%lo(0xA0406080),%g5 274 or %l0,%lo(0x20C0E000),%l0 275 sllx %g5,32,%g5 276 or %l0,%g5,%g5 ! (0xE0·i)&0xff=0xA040608020C0E000 277 stx %g5,[%i0+16] 278 279 ret 280 restore 281.type gcm_init_vis3,#function 282.size gcm_init_vis3,.-gcm_init_vis3 283 284.globl gcm_gmult_vis3 285.align 32 286gcm_gmult_vis3: 287 save %sp,-192,%sp 288 289 ldx [%i0+8],%o3 ! load Xi 290 ldx [%i0+0],%o4 291 ldx [%i1+8],%o1 ! load twisted H 292 ldx [%i1+0],%o2 293 294 mov 0xE1,%l7 295 sllx %l7,57,%o5 ! 57 is not a typo 296 ldx [%i1+16],%g5 ! (0xE0·i)&0xff=0xA040608020C0E000 297 298 xor %o2,%o1,%o0 ! Karatsuba pre-processing 299 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 300 xor %o3,%o4,%g3 ! Karatsuba pre-processing 301 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 302 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 303 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 304 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 305 .word 0x99b322aa !xmulx %o4,%o2,%o4 306 307 sll %g1,3,%o7 308 srlx %g5,%o7,%o7 ! ·0xE0 [implicit &(7<<3)] 309 xor %g1,%o7,%o7 310 sllx %o7,57,%o7 ! (%g1·0xE1)<<1<<56 [implicit &0x7f] 311 312 xor %g1,%g2,%g2 ! Karatsuba post-processing 313 xor %o3,%g3,%g3 314 xor %o7,%o3,%o3 ! real destination is %g2 315 xor %g4,%g3,%g3 316 xor %o3,%g2,%g2 317 xor %o4,%g3,%g3 318 xor %o4,%g2,%g2 319 320 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ·0xE1<<1<<56 321 xor %g1,%g3,%g3 322 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 323 xor %g2,%g4,%g4 324 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 325 326 xor %o3,%g3,%g3 327 xor %g1,%g3,%g3 328 xor %g2,%g4,%g4 329 330 stx %g3,[%i0+8] ! save Xi 331 stx %g4,[%i0+0] 332 333 ret 334 restore 335.type gcm_gmult_vis3,#function 336.size gcm_gmult_vis3,.-gcm_gmult_vis3 337 338.globl gcm_ghash_vis3 339.align 32 340gcm_ghash_vis3: 341 save %sp,-192,%sp 342 nop 343 srln %i3,0,%i3 ! needed on v8+, "nop" on v9 344 345 ldx [%i0+8],%g3 ! load Xi 346 ldx [%i0+0],%g4 347 ldx [%i1+8],%o1 ! load twisted H 348 ldx [%i1+0],%o2 349 350 mov 0xE1,%l7 351 sllx %l7,57,%o5 ! 57 is not a typo 352 ldx [%i1+16],%g5 ! (0xE0·i)&0xff=0xA040608020C0E000 353 354 and %i2,7,%l0 355 andn %i2,7,%i2 356 sll %l0,3,%l0 357 prefetch [%i2+63], 20 358 sub %g0,%l0,%l1 359 360 xor %o2,%o1,%o0 ! Karatsuba pre-processing 361.Loop: 362 ldx [%i2+8],%o3 363 brz,pt %l0,1f 364 ldx [%i2+0],%o4 365 366 ldx [%i2+16],%g2 ! align data 367 srlx %o3,%l1,%g1 368 sllx %o3,%l0,%o3 369 sllx %o4,%l0,%o4 370 srlx %g2,%l1,%g2 371 or %g1,%o4,%o4 372 or %g2,%o3,%o3 3731: 374 add %i2,16,%i2 375 sub %i3,16,%i3 376 xor %g3,%o3,%o3 377 xor %g4,%o4,%o4 378 prefetch [%i2+63], 20 379 380 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 381 xor %o3,%o4,%g3 ! Karatsuba pre-processing 382 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 383 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 384 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 385 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 386 .word 0x99b322aa !xmulx %o4,%o2,%o4 387 388 sll %g1,3,%o7 389 srlx %g5,%o7,%o7 ! ·0xE0 [implicit &(7<<3)] 390 xor %g1,%o7,%o7 391 sllx %o7,57,%o7 ! (%g1·0xE1)<<1<<56 [implicit &0x7f] 392 393 xor %g1,%g2,%g2 ! Karatsuba post-processing 394 xor %o3,%g3,%g3 395 xor %o7,%o3,%o3 ! real destination is %g2 396 xor %g4,%g3,%g3 397 xor %o3,%g2,%g2 398 xor %o4,%g3,%g3 399 xor %o4,%g2,%g2 400 401 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ·0xE1<<1<<56 402 xor %g1,%g3,%g3 403 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 404 xor %g2,%g4,%g4 405 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 406 407 xor %o3,%g3,%g3 408 xor %g1,%g3,%g3 409 brnz,pt %i3,.Loop 410 xor %g2,%g4,%g4 411 412 stx %g3,[%i0+8] ! save Xi 413 stx %g4,[%i0+0] 414 415 ret 416 restore 417.type gcm_ghash_vis3,#function 418.size gcm_ghash_vis3,.-gcm_ghash_vis3 419.asciz "GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>" 420.align 4 421