1#include <machine/asm.h> 2.text 3.globl ChaCha20_ctr32 4.type ChaCha20_ctr32,@function 5.align 16 6ChaCha20_ctr32: 7.L_ChaCha20_ctr32_begin: 8 #ifdef __CET__ 9 10.byte 243,15,30,251 11 #endif 12 13 pushl %ebp 14 pushl %ebx 15 pushl %esi 16 pushl %edi 17 xorl %eax,%eax 18 cmpl 28(%esp),%eax 19 je .L000no_data 20 call .Lpic_point 21.Lpic_point: 22 popl %eax 23 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 24 testl $16777216,(%ebp) 25 jz .L001x86 26 testl $512,4(%ebp) 27 jz .L001x86 28 jmp .Lssse3_shortcut 29.L001x86: 30 movl 32(%esp),%esi 31 movl 36(%esp),%edi 32 subl $132,%esp 33 movl (%esi),%eax 34 movl 4(%esi),%ebx 35 movl 8(%esi),%ecx 36 movl 12(%esi),%edx 37 movl %eax,80(%esp) 38 movl %ebx,84(%esp) 39 movl %ecx,88(%esp) 40 movl %edx,92(%esp) 41 movl 16(%esi),%eax 42 movl 20(%esi),%ebx 43 movl 24(%esi),%ecx 44 movl 28(%esi),%edx 45 movl %eax,96(%esp) 46 movl %ebx,100(%esp) 47 movl %ecx,104(%esp) 48 movl %edx,108(%esp) 49 movl (%edi),%eax 50 movl 4(%edi),%ebx 51 movl 8(%edi),%ecx 52 movl 12(%edi),%edx 53 subl $1,%eax 54 movl %eax,112(%esp) 55 movl %ebx,116(%esp) 56 movl %ecx,120(%esp) 57 movl %edx,124(%esp) 58 jmp .L002entry 59.align 16 60.L003outer_loop: 61 movl %ebx,156(%esp) 62 movl %eax,152(%esp) 63 movl %ecx,160(%esp) 64.L002entry: 65 movl $1634760805,%eax 66 movl $857760878,4(%esp) 67 movl $2036477234,8(%esp) 68 movl $1797285236,12(%esp) 69 movl 84(%esp),%ebx 70 movl 88(%esp),%ebp 71 movl 104(%esp),%ecx 72 movl 108(%esp),%esi 73 movl 116(%esp),%edx 74 movl 120(%esp),%edi 75 movl %ebx,20(%esp) 76 movl %ebp,24(%esp) 77 movl %ecx,40(%esp) 78 movl %esi,44(%esp) 79 movl %edx,52(%esp) 80 movl %edi,56(%esp) 81 movl 92(%esp),%ebx 82 movl 124(%esp),%edi 83 movl 112(%esp),%edx 84 movl 80(%esp),%ebp 85 movl 96(%esp),%ecx 86 movl 100(%esp),%esi 87 addl $1,%edx 88 movl %ebx,28(%esp) 89 movl %edi,60(%esp) 90 movl %edx,112(%esp) 91 movl $10,%ebx 92 jmp .L004loop 93.align 16 94.L004loop: 95 addl %ebp,%eax 96 movl %ebx,128(%esp) 97 movl %ebp,%ebx 98 xorl %eax,%edx 99 roll $16,%edx 100 addl %edx,%ecx 101 xorl %ecx,%ebx 102 movl 52(%esp),%edi 103 roll $12,%ebx 104 movl 20(%esp),%ebp 105 addl %ebx,%eax 106 xorl %eax,%edx 107 movl %eax,(%esp) 108 roll $8,%edx 109 movl 4(%esp),%eax 110 addl %edx,%ecx 111 movl %edx,48(%esp) 112 xorl %ecx,%ebx 113 addl %ebp,%eax 114 roll $7,%ebx 115 xorl %eax,%edi 116 movl %ecx,32(%esp) 117 roll $16,%edi 118 movl %ebx,16(%esp) 119 addl %edi,%esi 120 movl 40(%esp),%ecx 121 xorl %esi,%ebp 122 movl 56(%esp),%edx 123 roll $12,%ebp 124 movl 24(%esp),%ebx 125 addl %ebp,%eax 126 xorl %eax,%edi 127 movl %eax,4(%esp) 128 roll $8,%edi 129 movl 8(%esp),%eax 130 addl %edi,%esi 131 movl %edi,52(%esp) 132 xorl %esi,%ebp 133 addl %ebx,%eax 134 roll $7,%ebp 135 xorl %eax,%edx 136 movl %esi,36(%esp) 137 roll $16,%edx 138 movl %ebp,20(%esp) 139 addl %edx,%ecx 140 movl 44(%esp),%esi 141 xorl %ecx,%ebx 142 movl 60(%esp),%edi 143 roll $12,%ebx 144 movl 28(%esp),%ebp 145 addl %ebx,%eax 146 xorl %eax,%edx 147 movl %eax,8(%esp) 148 roll $8,%edx 149 movl 12(%esp),%eax 150 addl %edx,%ecx 151 movl %edx,56(%esp) 152 xorl %ecx,%ebx 153 addl %ebp,%eax 154 roll $7,%ebx 155 xorl %eax,%edi 156 roll $16,%edi 157 movl %ebx,24(%esp) 158 addl %edi,%esi 159 xorl %esi,%ebp 160 roll $12,%ebp 161 movl 20(%esp),%ebx 162 addl %ebp,%eax 163 xorl %eax,%edi 164 movl %eax,12(%esp) 165 roll $8,%edi 166 movl (%esp),%eax 167 addl %edi,%esi 168 movl %edi,%edx 169 xorl %esi,%ebp 170 addl %ebx,%eax 171 roll $7,%ebp 172 xorl %eax,%edx 173 roll $16,%edx 174 movl %ebp,28(%esp) 175 addl %edx,%ecx 176 xorl %ecx,%ebx 177 movl 48(%esp),%edi 178 roll $12,%ebx 179 movl 24(%esp),%ebp 180 addl %ebx,%eax 181 xorl %eax,%edx 182 movl %eax,(%esp) 183 roll $8,%edx 184 movl 4(%esp),%eax 185 addl %edx,%ecx 186 movl %edx,60(%esp) 187 xorl %ecx,%ebx 188 addl %ebp,%eax 189 roll $7,%ebx 190 xorl %eax,%edi 191 movl %ecx,40(%esp) 192 roll $16,%edi 193 movl %ebx,20(%esp) 194 addl %edi,%esi 195 movl 32(%esp),%ecx 196 xorl %esi,%ebp 197 movl 52(%esp),%edx 198 roll $12,%ebp 199 movl 28(%esp),%ebx 200 addl %ebp,%eax 201 xorl %eax,%edi 202 movl %eax,4(%esp) 203 roll $8,%edi 204 movl 8(%esp),%eax 205 addl %edi,%esi 206 movl %edi,48(%esp) 207 xorl %esi,%ebp 208 addl %ebx,%eax 209 roll $7,%ebp 210 xorl %eax,%edx 211 movl %esi,44(%esp) 212 roll $16,%edx 213 movl %ebp,24(%esp) 214 addl %edx,%ecx 215 movl 36(%esp),%esi 216 xorl %ecx,%ebx 217 movl 56(%esp),%edi 218 roll $12,%ebx 219 movl 16(%esp),%ebp 220 addl %ebx,%eax 221 xorl %eax,%edx 222 movl %eax,8(%esp) 223 roll $8,%edx 224 movl 12(%esp),%eax 225 addl %edx,%ecx 226 movl %edx,52(%esp) 227 xorl %ecx,%ebx 228 addl %ebp,%eax 229 roll $7,%ebx 230 xorl %eax,%edi 231 roll $16,%edi 232 movl %ebx,28(%esp) 233 addl %edi,%esi 234 xorl %esi,%ebp 235 movl 48(%esp),%edx 236 roll $12,%ebp 237 movl 128(%esp),%ebx 238 addl %ebp,%eax 239 xorl %eax,%edi 240 movl %eax,12(%esp) 241 roll $8,%edi 242 movl (%esp),%eax 243 addl %edi,%esi 244 movl %edi,56(%esp) 245 xorl %esi,%ebp 246 roll $7,%ebp 247 decl %ebx 248 jnz .L004loop 249 movl 160(%esp),%ebx 250 addl $1634760805,%eax 251 addl 80(%esp),%ebp 252 addl 96(%esp),%ecx 253 addl 100(%esp),%esi 254 cmpl $64,%ebx 255 jb .L005tail 256 movl 156(%esp),%ebx 257 addl 112(%esp),%edx 258 addl 120(%esp),%edi 259 xorl (%ebx),%eax 260 xorl 16(%ebx),%ebp 261 movl %eax,(%esp) 262 movl 152(%esp),%eax 263 xorl 32(%ebx),%ecx 264 xorl 36(%ebx),%esi 265 xorl 48(%ebx),%edx 266 xorl 56(%ebx),%edi 267 movl %ebp,16(%eax) 268 movl %ecx,32(%eax) 269 movl %esi,36(%eax) 270 movl %edx,48(%eax) 271 movl %edi,56(%eax) 272 movl 4(%esp),%ebp 273 movl 8(%esp),%ecx 274 movl 12(%esp),%esi 275 movl 20(%esp),%edx 276 movl 24(%esp),%edi 277 addl $857760878,%ebp 278 addl $2036477234,%ecx 279 addl $1797285236,%esi 280 addl 84(%esp),%edx 281 addl 88(%esp),%edi 282 xorl 4(%ebx),%ebp 283 xorl 8(%ebx),%ecx 284 xorl 12(%ebx),%esi 285 xorl 20(%ebx),%edx 286 xorl 24(%ebx),%edi 287 movl %ebp,4(%eax) 288 movl %ecx,8(%eax) 289 movl %esi,12(%eax) 290 movl %edx,20(%eax) 291 movl %edi,24(%eax) 292 movl 28(%esp),%ebp 293 movl 40(%esp),%ecx 294 movl 44(%esp),%esi 295 movl 52(%esp),%edx 296 movl 60(%esp),%edi 297 addl 92(%esp),%ebp 298 addl 104(%esp),%ecx 299 addl 108(%esp),%esi 300 addl 116(%esp),%edx 301 addl 124(%esp),%edi 302 xorl 28(%ebx),%ebp 303 xorl 40(%ebx),%ecx 304 xorl 44(%ebx),%esi 305 xorl 52(%ebx),%edx 306 xorl 60(%ebx),%edi 307 leal 64(%ebx),%ebx 308 movl %ebp,28(%eax) 309 movl (%esp),%ebp 310 movl %ecx,40(%eax) 311 movl 160(%esp),%ecx 312 movl %esi,44(%eax) 313 movl %edx,52(%eax) 314 movl %edi,60(%eax) 315 movl %ebp,(%eax) 316 leal 64(%eax),%eax 317 subl $64,%ecx 318 jnz .L003outer_loop 319 jmp .L006done 320.L005tail: 321 addl 112(%esp),%edx 322 addl 120(%esp),%edi 323 movl %eax,(%esp) 324 movl %ebp,16(%esp) 325 movl %ecx,32(%esp) 326 movl %esi,36(%esp) 327 movl %edx,48(%esp) 328 movl %edi,56(%esp) 329 movl 4(%esp),%ebp 330 movl 8(%esp),%ecx 331 movl 12(%esp),%esi 332 movl 20(%esp),%edx 333 movl 24(%esp),%edi 334 addl $857760878,%ebp 335 addl $2036477234,%ecx 336 addl $1797285236,%esi 337 addl 84(%esp),%edx 338 addl 88(%esp),%edi 339 movl %ebp,4(%esp) 340 movl %ecx,8(%esp) 341 movl %esi,12(%esp) 342 movl %edx,20(%esp) 343 movl %edi,24(%esp) 344 movl 28(%esp),%ebp 345 movl 40(%esp),%ecx 346 movl 44(%esp),%esi 347 movl 52(%esp),%edx 348 movl 60(%esp),%edi 349 addl 92(%esp),%ebp 350 addl 104(%esp),%ecx 351 addl 108(%esp),%esi 352 addl 116(%esp),%edx 353 addl 124(%esp),%edi 354 movl %ebp,28(%esp) 355 movl 156(%esp),%ebp 356 movl %ecx,40(%esp) 357 movl 152(%esp),%ecx 358 movl %esi,44(%esp) 359 xorl %esi,%esi 360 movl %edx,52(%esp) 361 movl %edi,60(%esp) 362 xorl %eax,%eax 363 xorl %edx,%edx 364.L007tail_loop: 365 movb (%esi,%ebp,1),%al 366 movb (%esp,%esi,1),%dl 367 leal 1(%esi),%esi 368 xorb %dl,%al 369 movb %al,-1(%ecx,%esi,1) 370 decl %ebx 371 jnz .L007tail_loop 372.L006done: 373 addl $132,%esp 374.L000no_data: 375 popl %edi 376 popl %esi 377 popl %ebx 378 popl %ebp 379 ret 380.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 381.globl ChaCha20_ssse3 382.type ChaCha20_ssse3,@function 383.align 16 384ChaCha20_ssse3: 385.L_ChaCha20_ssse3_begin: 386 #ifdef __CET__ 387 388.byte 243,15,30,251 389 #endif 390 391 pushl %ebp 392 pushl %ebx 393 pushl %esi 394 pushl %edi 395.Lssse3_shortcut: 396 movl 20(%esp),%edi 397 movl 24(%esp),%esi 398 movl 28(%esp),%ecx 399 movl 32(%esp),%edx 400 movl 36(%esp),%ebx 401 movl %esp,%ebp 402 subl $524,%esp 403 andl $-64,%esp 404 movl %ebp,512(%esp) 405 leal .Lssse3_data-.Lpic_point(%eax),%eax 406 movdqu (%ebx),%xmm3 407.L0081x: 408 movdqa 32(%eax),%xmm0 409 movdqu (%edx),%xmm1 410 movdqu 16(%edx),%xmm2 411 movdqa (%eax),%xmm6 412 movdqa 16(%eax),%xmm7 413 movl %ebp,48(%esp) 414 movdqa %xmm0,(%esp) 415 movdqa %xmm1,16(%esp) 416 movdqa %xmm2,32(%esp) 417 movdqa %xmm3,48(%esp) 418 movl $10,%edx 419 jmp .L009loop1x 420.align 16 421.L010outer1x: 422 movdqa 80(%eax),%xmm3 423 movdqa (%esp),%xmm0 424 movdqa 16(%esp),%xmm1 425 movdqa 32(%esp),%xmm2 426 paddd 48(%esp),%xmm3 427 movl $10,%edx 428 movdqa %xmm3,48(%esp) 429 jmp .L009loop1x 430.align 16 431.L009loop1x: 432 paddd %xmm1,%xmm0 433 pxor %xmm0,%xmm3 434.byte 102,15,56,0,222 435 paddd %xmm3,%xmm2 436 pxor %xmm2,%xmm1 437 movdqa %xmm1,%xmm4 438 psrld $20,%xmm1 439 pslld $12,%xmm4 440 por %xmm4,%xmm1 441 paddd %xmm1,%xmm0 442 pxor %xmm0,%xmm3 443.byte 102,15,56,0,223 444 paddd %xmm3,%xmm2 445 pxor %xmm2,%xmm1 446 movdqa %xmm1,%xmm4 447 psrld $25,%xmm1 448 pslld $7,%xmm4 449 por %xmm4,%xmm1 450 pshufd $78,%xmm2,%xmm2 451 pshufd $57,%xmm1,%xmm1 452 pshufd $147,%xmm3,%xmm3 453 nop 454 paddd %xmm1,%xmm0 455 pxor %xmm0,%xmm3 456.byte 102,15,56,0,222 457 paddd %xmm3,%xmm2 458 pxor %xmm2,%xmm1 459 movdqa %xmm1,%xmm4 460 psrld $20,%xmm1 461 pslld $12,%xmm4 462 por %xmm4,%xmm1 463 paddd %xmm1,%xmm0 464 pxor %xmm0,%xmm3 465.byte 102,15,56,0,223 466 paddd %xmm3,%xmm2 467 pxor %xmm2,%xmm1 468 movdqa %xmm1,%xmm4 469 psrld $25,%xmm1 470 pslld $7,%xmm4 471 por %xmm4,%xmm1 472 pshufd $78,%xmm2,%xmm2 473 pshufd $147,%xmm1,%xmm1 474 pshufd $57,%xmm3,%xmm3 475 decl %edx 476 jnz .L009loop1x 477 paddd (%esp),%xmm0 478 paddd 16(%esp),%xmm1 479 paddd 32(%esp),%xmm2 480 paddd 48(%esp),%xmm3 481 cmpl $64,%ecx 482 jb .L011tail 483 movdqu (%esi),%xmm4 484 movdqu 16(%esi),%xmm5 485 pxor %xmm4,%xmm0 486 movdqu 32(%esi),%xmm4 487 pxor %xmm5,%xmm1 488 movdqu 48(%esi),%xmm5 489 pxor %xmm4,%xmm2 490 pxor %xmm5,%xmm3 491 leal 64(%esi),%esi 492 movdqu %xmm0,(%edi) 493 movdqu %xmm1,16(%edi) 494 movdqu %xmm2,32(%edi) 495 movdqu %xmm3,48(%edi) 496 leal 64(%edi),%edi 497 subl $64,%ecx 498 jnz .L010outer1x 499 jmp .L012done 500.L011tail: 501 movdqa %xmm0,(%esp) 502 movdqa %xmm1,16(%esp) 503 movdqa %xmm2,32(%esp) 504 movdqa %xmm3,48(%esp) 505 xorl %eax,%eax 506 xorl %edx,%edx 507 xorl %ebp,%ebp 508.L013tail_loop: 509 movb (%esp,%ebp,1),%al 510 movb (%esi,%ebp,1),%dl 511 leal 1(%ebp),%ebp 512 xorb %dl,%al 513 movb %al,-1(%edi,%ebp,1) 514 decl %ecx 515 jnz .L013tail_loop 516.L012done: 517 movl 512(%esp),%esp 518 popl %edi 519 popl %esi 520 popl %ebx 521 popl %ebp 522 ret 523.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 524.align 64 525.Lssse3_data: 526.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 527.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 528.long 1634760805,857760878,2036477234,1797285236 529.long 0,1,2,3 530.long 4,4,4,4 531.long 1,0,0,0 532.long 4,0,0,0 533.long 0,-1,-1,-1 534.align 64 535.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 536.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 537.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 538.byte 114,103,62,0 539.comm OPENSSL_ia32cap_P,16,4 540 541 .section ".note.gnu.property", "a" 542 .p2align 2 543 .long 1f - 0f 544 .long 4f - 1f 545 .long 5 5460: 547 .asciz "GNU" 5481: 549 .p2align 2 550 .long 0xc0000002 551 .long 3f - 2f 5522: 553 .long 3 5543: 555 .p2align 2 5564: 557