1.section ".text",#alloc,#execinstr 2 3.align 64 4rem_4bit: 5 .long 0,0,471859200,0,943718400,0,610271232,0 6 .long 1887436800,0,1822425088,0,1220542464,0,1423966208,0 7 .long 3774873600,0,4246732800,0,3644850176,0,3311403008,0 8 .long 2441084928,0,2376073216,0,2847932416,0,3051356160,0 9.type rem_4bit,#object 10.size rem_4bit,(.-rem_4bit) 11 12.globl gcm_ghash_4bit 13.align 32 14gcm_ghash_4bit: 15 save %sp,-112,%sp 16 ldub [%i2+15],%l1 17 ldub [%i0+15],%l2 18 ldub [%i0+14],%l3 19 add %i3,%i2,%i3 20 add %i1,8,%l6 21 221: call .+8 23 add %o7,rem_4bit-1b,%l4 24 25.Louter: 26 xor %l2,%l1,%l1 27 and %l1,0xf0,%l0 28 and %l1,0x0f,%l1 29 sll %l1,4,%l1 30 ldx [%l6+%l1],%o1 31 ldx [%i1+%l1],%o0 32 33 ldub [%i2+14],%l1 34 35 ldx [%l6+%l0],%o3 36 and %o1,0xf,%l5 37 ldx [%i1+%l0],%o2 38 sll %l5,3,%l5 39 ldx [%l4+%l5],%o4 40 srlx %o1,4,%o1 41 mov 13,%l7 42 sllx %o0,60,%o5 43 xor %o3,%o1,%o1 44 srlx %o0,4,%o0 45 xor %o1,%o5,%o1 46 47 xor %l3,%l1,%l1 48 and %o1,0xf,%l5 49 and %l1,0xf0,%l0 50 and %l1,0x0f,%l1 51 ba .Lghash_inner 52 sll %l1,4,%l1 53.align 32 54.Lghash_inner: 55 ldx [%l6+%l1],%o3 56 sll %l5,3,%l5 57 xor %o2,%o0,%o0 58 ldx [%i1+%l1],%o2 59 srlx %o1,4,%o1 60 xor %o4,%o0,%o0 61 ldx [%l4+%l5],%o4 62 sllx %o0,60,%o5 63 xor %o3,%o1,%o1 64 ldub [%i2+%l7],%l1 65 srlx %o0,4,%o0 66 xor %o1,%o5,%o1 67 ldub [%i0+%l7],%l3 68 xor %o2,%o0,%o0 69 and %o1,0xf,%l5 70 71 ldx [%l6+%l0],%o3 72 sll %l5,3,%l5 73 xor %o4,%o0,%o0 74 ldx [%i1+%l0],%o2 75 srlx %o1,4,%o1 76 ldx [%l4+%l5],%o4 77 sllx %o0,60,%o5 78 xor %l3,%l1,%l1 79 srlx %o0,4,%o0 80 and %l1,0xf0,%l0 81 addcc %l7,-1,%l7 82 xor %o1,%o5,%o1 83 and %l1,0x0f,%l1 84 xor %o3,%o1,%o1 85 sll %l1,4,%l1 86 blu .Lghash_inner 87 and %o1,0xf,%l5 88 89 ldx [%l6+%l1],%o3 90 sll %l5,3,%l5 91 xor %o2,%o0,%o0 92 ldx [%i1+%l1],%o2 93 srlx %o1,4,%o1 94 xor %o4,%o0,%o0 95 ldx [%l4+%l5],%o4 96 sllx %o0,60,%o5 97 xor %o3,%o1,%o1 98 srlx %o0,4,%o0 99 xor %o1,%o5,%o1 100 xor %o2,%o0,%o0 101 102 add %i2,16,%i2 103 cmp %i2,%i3 104 be,pn %icc,.Ldone 105 and %o1,0xf,%l5 106 107 ldx [%l6+%l0],%o3 108 sll %l5,3,%l5 109 xor %o4,%o0,%o0 110 ldx [%i1+%l0],%o2 111 srlx %o1,4,%o1 112 ldx [%l4+%l5],%o4 113 sllx %o0,60,%o5 114 xor %o3,%o1,%o1 115 ldub [%i2+15],%l1 116 srlx %o0,4,%o0 117 xor %o1,%o5,%o1 118 xor %o2,%o0,%o0 119 stx %o1,[%i0+8] 120 xor %o4,%o0,%o0 121 stx %o0,[%i0] 122 srl %o1,8,%l3 123 and %o1,0xff,%l2 124 ba .Louter 125 and %l3,0xff,%l3 126.align 32 127.Ldone: 128 ldx [%l6+%l0],%o3 129 sll %l5,3,%l5 130 xor %o4,%o0,%o0 131 ldx [%i1+%l0],%o2 132 srlx %o1,4,%o1 133 ldx [%l4+%l5],%o4 134 sllx %o0,60,%o5 135 xor %o3,%o1,%o1 136 srlx %o0,4,%o0 137 xor %o1,%o5,%o1 138 xor %o2,%o0,%o0 139 stx %o1,[%i0+8] 140 xor %o4,%o0,%o0 141 stx %o0,[%i0] 142 143 ret 144 restore 145.type gcm_ghash_4bit,#function 146.size gcm_ghash_4bit,(.-gcm_ghash_4bit) 147.globl gcm_gmult_4bit 148.align 32 149gcm_gmult_4bit: 150 save %sp,-112,%sp 151 ldub [%i0+15],%l1 152 add %i1,8,%l6 153 1541: call .+8 155 add %o7,rem_4bit-1b,%l4 156 157 and %l1,0xf0,%l0 158 and %l1,0x0f,%l1 159 sll %l1,4,%l1 160 ldx [%l6+%l1],%o1 161 ldx [%i1+%l1],%o0 162 163 ldub [%i0+14],%l1 164 165 ldx [%l6+%l0],%o3 166 and %o1,0xf,%l5 167 ldx [%i1+%l0],%o2 168 sll %l5,3,%l5 169 ldx [%l4+%l5],%o4 170 srlx %o1,4,%o1 171 mov 13,%l7 172 sllx %o0,60,%o5 173 xor %o3,%o1,%o1 174 srlx %o0,4,%o0 175 xor %o1,%o5,%o1 176 177 and %o1,0xf,%l5 178 and %l1,0xf0,%l0 179 and %l1,0x0f,%l1 180 ba .Lgmult_inner 181 sll %l1,4,%l1 182.align 32 183.Lgmult_inner: 184 ldx [%l6+%l1],%o3 185 sll %l5,3,%l5 186 xor %o2,%o0,%o0 187 ldx [%i1+%l1],%o2 188 srlx %o1,4,%o1 189 xor %o4,%o0,%o0 190 ldx [%l4+%l5],%o4 191 sllx %o0,60,%o5 192 xor %o3,%o1,%o1 193 ldub [%i0+%l7],%l1 194 srlx %o0,4,%o0 195 xor %o1,%o5,%o1 196 xor %o2,%o0,%o0 197 and %o1,0xf,%l5 198 199 ldx [%l6+%l0],%o3 200 sll %l5,3,%l5 201 xor %o4,%o0,%o0 202 ldx [%i1+%l0],%o2 203 srlx %o1,4,%o1 204 ldx [%l4+%l5],%o4 205 sllx %o0,60,%o5 206 srlx %o0,4,%o0 207 and %l1,0xf0,%l0 208 addcc %l7,-1,%l7 209 xor %o1,%o5,%o1 210 and %l1,0x0f,%l1 211 xor %o3,%o1,%o1 212 sll %l1,4,%l1 213 blu .Lgmult_inner 214 and %o1,0xf,%l5 215 216 ldx [%l6+%l1],%o3 217 sll %l5,3,%l5 218 xor %o2,%o0,%o0 219 ldx [%i1+%l1],%o2 220 srlx %o1,4,%o1 221 xor %o4,%o0,%o0 222 ldx [%l4+%l5],%o4 223 sllx %o0,60,%o5 224 xor %o3,%o1,%o1 225 srlx %o0,4,%o0 226 xor %o1,%o5,%o1 227 xor %o2,%o0,%o0 228 and %o1,0xf,%l5 229 230 ldx [%l6+%l0],%o3 231 sll %l5,3,%l5 232 xor %o4,%o0,%o0 233 ldx [%i1+%l0],%o2 234 srlx %o1,4,%o1 235 ldx [%l4+%l5],%o4 236 sllx %o0,60,%o5 237 xor %o3,%o1,%o1 238 srlx %o0,4,%o0 239 xor %o1,%o5,%o1 240 xor %o2,%o0,%o0 241 stx %o1,[%i0+8] 242 xor %o4,%o0,%o0 243 stx %o0,[%i0] 244 245 ret 246 restore 247.type gcm_gmult_4bit,#function 248.size gcm_gmult_4bit,(.-gcm_gmult_4bit) 249.globl gcm_init_vis3 250.align 32 251gcm_init_vis3: 252 save %sp,-112,%sp 253 254 ldx [%i1+0],%o2 255 ldx [%i1+8],%o1 256 mov 0xE1,%o4 257 mov 1,%o3 258 sllx %o4,57,%o4 259 srax %o2,63,%g1 ! broadcast carry 260 addcc %o1,%o1,%o1 ! H<<=1 261 .word 0x95b2822a !addxc %o2,%o2,%o2 262 and %g1,%o3,%o3 263 and %g1,%o4,%o4 264 xor %o3,%o1,%o1 265 xor %o4,%o2,%o2 266 stx %o1,[%i0+8] ! save twisted H 267 stx %o2,[%i0+0] 268 269 sethi %hi(0xA0406080),%g5 270 sethi %hi(0x20C0E000),%l0 271 or %g5,%lo(0xA0406080),%g5 272 or %l0,%lo(0x20C0E000),%l0 273 sllx %g5,32,%g5 274 or %l0,%g5,%g5 ! (0xE0·i)&0xff=0xA040608020C0E000 275 stx %g5,[%i0+16] 276 277 ret 278 restore 279.type gcm_init_vis3,#function 280.size gcm_init_vis3,.-gcm_init_vis3 281 282.globl gcm_gmult_vis3 283.align 32 284gcm_gmult_vis3: 285 save %sp,-112,%sp 286 287 ldx [%i0+8],%o3 ! load Xi 288 ldx [%i0+0],%o4 289 ldx [%i1+8],%o1 ! load twisted H 290 ldx [%i1+0],%o2 291 292 mov 0xE1,%l7 293 sllx %l7,57,%o5 ! 57 is not a typo 294 ldx [%i1+16],%g5 ! (0xE0·i)&0xff=0xA040608020C0E000 295 296 xor %o2,%o1,%o0 ! Karatsuba pre-processing 297 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 298 xor %o3,%o4,%g3 ! Karatsuba pre-processing 299 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 300 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 301 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 302 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 303 .word 0x99b322aa !xmulx %o4,%o2,%o4 304 305 sll %g1,3,%o7 306 srlx %g5,%o7,%o7 ! ·0xE0 [implicit &(7<<3)] 307 xor %g1,%o7,%o7 308 sllx %o7,57,%o7 ! (%g1·0xE1)<<1<<56 [implicit &0x7f] 309 310 xor %g1,%g2,%g2 ! Karatsuba post-processing 311 xor %o3,%g3,%g3 312 xor %o7,%o3,%o3 ! real destination is %g2 313 xor %g4,%g3,%g3 314 xor %o3,%g2,%g2 315 xor %o4,%g3,%g3 316 xor %o4,%g2,%g2 317 318 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ·0xE1<<1<<56 319 xor %g1,%g3,%g3 320 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 321 xor %g2,%g4,%g4 322 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 323 324 xor %o3,%g3,%g3 325 xor %g1,%g3,%g3 326 xor %g2,%g4,%g4 327 328 stx %g3,[%i0+8] ! save Xi 329 stx %g4,[%i0+0] 330 331 ret 332 restore 333.type gcm_gmult_vis3,#function 334.size gcm_gmult_vis3,.-gcm_gmult_vis3 335 336.globl gcm_ghash_vis3 337.align 32 338gcm_ghash_vis3: 339 save %sp,-112,%sp 340 nop 341 srln %i3,0,%i3 ! needed on v8+, "nop" on v9 342 343 ldx [%i0+8],%g3 ! load Xi 344 ldx [%i0+0],%g4 345 ldx [%i1+8],%o1 ! load twisted H 346 ldx [%i1+0],%o2 347 348 mov 0xE1,%l7 349 sllx %l7,57,%o5 ! 57 is not a typo 350 ldx [%i1+16],%g5 ! (0xE0·i)&0xff=0xA040608020C0E000 351 352 and %i2,7,%l0 353 andn %i2,7,%i2 354 sll %l0,3,%l0 355 prefetch [%i2+63], 20 356 sub %g0,%l0,%l1 357 358 xor %o2,%o1,%o0 ! Karatsuba pre-processing 359.Loop: 360 ldx [%i2+8],%o3 361 brz,pt %l0,1f 362 ldx [%i2+0],%o4 363 364 ldx [%i2+16],%g2 ! align data 365 srlx %o3,%l1,%g1 366 sllx %o3,%l0,%o3 367 sllx %o4,%l0,%o4 368 srlx %g2,%l1,%g2 369 or %g1,%o4,%o4 370 or %g2,%o3,%o3 3711: 372 add %i2,16,%i2 373 sub %i3,16,%i3 374 xor %g3,%o3,%o3 375 xor %g4,%o4,%o4 376 prefetch [%i2+63], 20 377 378 .word 0x83b2e2a9 !xmulx %o3,%o1,%g1 379 xor %o3,%o4,%g3 ! Karatsuba pre-processing 380 .word 0x85b0e2a8 !xmulx %g3,%o0,%g2 381 .word 0x97b2e2c9 !xmulxhi %o3,%o1,%o3 382 .word 0x87b0e2c8 !xmulxhi %g3,%o0,%g3 383 .word 0x89b322ca !xmulxhi %o4,%o2,%g4 384 .word 0x99b322aa !xmulx %o4,%o2,%o4 385 386 sll %g1,3,%o7 387 srlx %g5,%o7,%o7 ! ·0xE0 [implicit &(7<<3)] 388 xor %g1,%o7,%o7 389 sllx %o7,57,%o7 ! (%g1·0xE1)<<1<<56 [implicit &0x7f] 390 391 xor %g1,%g2,%g2 ! Karatsuba post-processing 392 xor %o3,%g3,%g3 393 xor %o7,%o3,%o3 ! real destination is %g2 394 xor %g4,%g3,%g3 395 xor %o3,%g2,%g2 396 xor %o4,%g3,%g3 397 xor %o4,%g2,%g2 398 399 .word 0x97b062cd !xmulxhi %g1,%o5,%o3 ! ·0xE1<<1<<56 400 xor %g1,%g3,%g3 401 .word 0x83b0a2ad !xmulx %g2,%o5,%g1 402 xor %g2,%g4,%g4 403 .word 0x85b0a2cd !xmulxhi %g2,%o5,%g2 404 405 xor %o3,%g3,%g3 406 xor %g1,%g3,%g3 407 brnz,pt %i3,.Loop 408 xor %g2,%g4,%g4 409 410 stx %g3,[%i0+8] ! save Xi 411 stx %g4,[%i0+0] 412 413 ret 414 restore 415.type gcm_ghash_vis3,#function 416.size gcm_ghash_vis3,.-gcm_ghash_vis3 417.asciz "GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>" 418.align 4 419