Lines Matching full:have
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-SHLD,X64-NO-BMI2-HAVE-SHLD
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-NO-SHLD,X64-HAVE-BMI2-NO-SHLD
5 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-SHLD,X64-HAVE-BMI2-HAVE-SHLD
7 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-NO-BMI2,X86-SHLD,X86-NO-BMI2-HAVE-SHLD
8 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-BMI2,X86-NO-SHLD,X86-HAVE-BMI2-NO-SHLD
9 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-BMI2,X86-SHLD,X86-HAVE-BMI2-HAVE-SHLD
214 ; X86-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca:
215 ; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
216 ; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
217 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
218 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
219 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
220 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
221 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
222 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
223 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
224 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
225 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
226 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
227 ; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
228 ; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
229 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, (%eax)
230 ; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
231 ; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
233 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca:
234 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
235 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
236 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
237 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
238 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
239 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
240 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
241 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
242 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
243 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
244 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
245 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
246 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
247 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
248 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
249 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
250 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
251 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi
252 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx
253 ; X86-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
254 ; X86-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx
255 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax)
256 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
257 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
258 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
259 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
261 ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca:
262 ; X86-HAVE-BMI2-HAVE-SHLD: # %bb.0:
263 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
264 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
265 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
266 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
267 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
268 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
269 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
270 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
271 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
272 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
273 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
274 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebx
275 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
276 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx
277 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax)
278 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
279 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
280 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: retl
338 ; X86-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca:
339 ; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
340 ; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
341 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
342 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
343 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
344 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
345 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
346 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
347 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
348 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
349 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
350 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
351 ; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
352 ; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
353 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax)
354 ; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
355 ; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
357 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca:
358 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
359 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
360 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
361 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
362 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
363 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
364 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
365 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
366 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
367 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
368 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
369 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
370 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
371 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
372 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
373 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
374 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
375 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi
376 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx
377 ; X86-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
378 ; X86-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx
379 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax)
380 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
381 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
382 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
383 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
385 ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca:
386 ; X86-HAVE-BMI2-HAVE-SHLD: # %bb.0:
387 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
388 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
389 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
390 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
391 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
392 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
393 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
394 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
395 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
396 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
397 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi
398 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
399 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
400 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax)
401 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
402 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: retl
459 ; X86-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca:
460 ; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
461 ; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
462 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
463 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
464 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
465 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
466 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
467 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
468 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
469 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
470 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
471 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
472 ; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
473 ; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
474 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax)
475 ; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
476 ; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
478 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca:
479 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
480 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
481 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
482 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
483 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
484 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
485 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
486 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
487 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
488 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
489 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
490 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
491 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
492 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
493 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
494 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
495 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
496 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi
497 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx
498 ; X86-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
499 ; X86-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx
500 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
501 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
502 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
503 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
504 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
506 ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca:
507 ; X86-HAVE-BMI2-HAVE-SHLD: # %bb.0:
508 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
509 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
510 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
511 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
512 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
513 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
514 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
515 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
516 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
517 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
518 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi
519 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
520 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
521 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax)
522 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
523 ; X86-HAVE-BMI2-HAVE-SHLD-NEXT: retl
557 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
558 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
559 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
560 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
561 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
562 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
563 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
564 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
565 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
566 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
567 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
568 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
569 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx)
570 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
572 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
573 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
574 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
575 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
576 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
577 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
578 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
579 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
580 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
581 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx), %r8d
582 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
583 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
584 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
585 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
586 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
587 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx
588 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx)
589 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
591 ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
592 ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
593 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
594 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
595 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
596 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
597 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
598 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
599 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
600 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi
601 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
602 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
603 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx)
604 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
660 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
661 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
662 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
663 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp
664 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
665 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
666 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
667 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
668 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
669 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
670 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
671 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
672 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
673 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
674 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
675 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
676 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
677 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
678 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
679 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
680 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
681 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
682 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax)
683 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp
684 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
685 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
719 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
720 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
721 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
722 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
723 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
724 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
725 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
726 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
727 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
728 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
729 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
730 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
731 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx)
732 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
734 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
735 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
736 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
737 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
738 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
739 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
740 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
741 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
742 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
743 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx), %r8d
744 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
745 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
746 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
747 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
748 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
749 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx
750 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx)
751 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
753 ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
754 ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
755 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
756 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
757 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
758 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
759 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
760 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
761 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
762 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi
763 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
764 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
765 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx)
766 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
822 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
823 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
824 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
825 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp
826 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
827 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
828 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
829 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
830 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
831 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
832 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
833 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
834 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
835 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
836 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
837 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
838 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
839 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
840 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
841 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
842 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
843 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
844 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%eax)
845 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp
846 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
847 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
880 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
881 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
882 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
883 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
884 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
885 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
886 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
887 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
888 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
889 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
890 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
891 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
892 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx)
893 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
895 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
896 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
897 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
898 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
899 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
900 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
901 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
902 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
903 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
904 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx), %r8d
905 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
906 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
907 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
908 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
909 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
910 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx
911 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx)
912 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
914 ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
915 ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
916 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
917 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
918 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
919 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
920 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
921 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
922 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
923 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi
924 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
925 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
926 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx)
927 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
983 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
984 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
985 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
986 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp
987 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
988 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
989 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
990 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
991 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
992 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
993 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
994 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
995 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
996 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
997 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
998 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
999 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
1000 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1001 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
1002 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1003 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1004 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1005 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax)
1006 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp
1007 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1008 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1041 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1042 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
1043 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
1044 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
1045 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
1046 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
1047 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1048 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
1049 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
1050 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
1051 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
1052 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
1053 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx)
1054 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
1056 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1057 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
1058 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
1059 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
1060 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1061 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
1062 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
1063 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
1064 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
1065 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
1066 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
1067 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
1068 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
1069 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
1070 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
1071 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax
1072 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx)
1073 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
1075 ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1076 ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
1077 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
1078 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
1079 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
1080 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
1081 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1082 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
1083 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
1084 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi
1085 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
1086 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
1087 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx)
1088 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
1168 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1169 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1170 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
1171 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
1172 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
1173 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1174 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp
1175 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1176 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1177 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1178 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1179 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1180 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
1181 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1182 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1183 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1184 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
1185 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
1186 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1187 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
1188 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
1189 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
1190 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edi
1191 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%esp,%edx), %ebp
1192 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx
1193 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx
1194 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx
1195 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %esi
1196 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
1197 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1198 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %ebp, %ebp
1199 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx
1200 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1201 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax)
1202 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
1203 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp
1204 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1205 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
1206 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
1207 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
1208 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1317 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca:
1318 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1319 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1320 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $72, %esp
1321 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1322 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1323 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1324 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1325 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1326 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1327 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1328 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1329 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1330 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1331 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1332 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1333 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1334 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1335 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1336 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1337 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1338 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1339 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1340 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1341 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax)
1342 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $72, %esp
1343 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1344 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1465 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca:
1466 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1467 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1468 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $72, %esp
1469 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1470 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1471 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1472 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1473 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1474 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1475 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1476 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1477 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1478 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1479 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1480 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1481 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1482 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1483 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1484 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1485 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1486 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1487 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1488 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1489 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%eax)
1490 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $72, %esp
1491 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1492 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1612 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca:
1613 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1614 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1615 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $72, %esp
1616 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1617 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1618 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1619 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1620 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1621 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1622 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1623 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1624 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1625 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1626 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1627 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1628 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1629 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1630 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1631 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1632 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1633 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1634 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1635 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1636 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax)
1637 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $72, %esp
1638 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1639 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1696 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca:
1697 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
1698 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups (%rdi), %xmm0
1699 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%rdi), %xmm1
1700 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
1701 ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1702 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1703 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1704 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1705 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1706 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax
1707 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrb $6, %al
1708 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl %al, %eax
1709 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rcx
1710 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %sil
1711 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%rax,8), %rax
1712 ; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rax, %rax
1713 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rsi, %rax, %rax
1714 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rax
1715 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx)
1716 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
1800 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca:
1801 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1802 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
1803 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
1804 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
1805 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1806 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $76, %esp
1807 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1808 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1809 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1810 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1811 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1812 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1813 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1814 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1815 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1816 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1817 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1818 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1819 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1820 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1821 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1822 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
1823 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
1824 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edi
1825 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%esp,%edx,4), %ebp
1826 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx
1827 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx
1828 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx
1829 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %esi
1830 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
1831 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1832 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %ebp, %ebp
1833 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx
1834 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1835 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax)
1836 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
1837 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $76, %esp
1838 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1839 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
1840 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
1841 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
1842 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1889 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
1890 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
1891 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movups (%rdi), %xmm0
1892 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movups 16(%rdi), %xmm1
1893 ; X64-NO-BMI2-HAVE-SHLD-NEXT: leal (,%rsi,8), %eax
1894 ; X64-NO-BMI2-HAVE-SHLD-NEXT: xorps %xmm2, %xmm2
1895 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1896 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1897 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1898 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1899 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
1900 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrb $6, %cl
1901 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl %cl, %esi
1902 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -72(%rsp,%rsi,8), %rdi
1903 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -64(%rsp,%rsi,8), %r8
1904 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r9
1905 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
1906 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r9
1907 ; X64-NO-BMI2-HAVE-SHLD-NEXT: notb %cl
1908 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -56(%rsp,%rsi,8), %rsi
1909 ; X64-NO-BMI2-HAVE-SHLD-NEXT: addq %rsi, %rsi
1910 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rsi
1911 ; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r9, %rsi
1912 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
1913 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi
1914 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx)
1915 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, 8(%rdx)
1916 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
1918 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
1919 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
1920 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups (%rdi), %xmm0
1921 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%rdi), %xmm1
1922 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
1923 ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1924 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1925 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1926 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1927 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1928 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax
1929 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrb $6, %al
1930 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl %al, %eax
1931 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rcx
1932 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%rax,8), %rdi
1933 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rdi, %r8
1934 ; X64-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
1935 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %sil
1936 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -56(%rsp,%rax,8), %rax
1937 ; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rdi, %rdi
1938 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rsi, %rdi, %rdi
1939 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
1940 ; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rax, %rax
1941 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rsi, %rax, %rax
1942 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %rax
1943 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 8(%rdx)
1944 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx)
1945 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
1947 ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
1948 ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
1949 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movups (%rdi), %xmm0
1950 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movups 16(%rdi), %xmm1
1951 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal (,%rsi,8), %ecx
1952 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorps %xmm2, %xmm2
1953 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1954 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1955 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1956 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1957 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
1958 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $6, %al
1959 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl %al, %eax
1960 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -72(%rsp,%rax,8), %rsi
1961 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -64(%rsp,%rax,8), %rdi
1962 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r8
1963 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %r9d
1964 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: notb %r9b
1965 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -56(%rsp,%rax,8), %rax
1966 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: addq %rax, %rax
1967 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rax, %rax
1968 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r8, %rax
1969 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $rcx
1970 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rsi
1971 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx)
1972 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, 8(%rdx)
1973 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
2086 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
2087 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
2088 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
2089 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
2090 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
2091 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
2092 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $92, %esp
2093 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
2094 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
2095 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%ecx), %xmm0
2096 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%ecx), %xmm1
2097 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax
2098 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
2099 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
2100 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
2101 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
2102 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
2103 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
2104 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %cl
2105 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %cl, %ecx
2106 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, 16(%esp,%ecx,4), %edi
2107 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esp,%ecx,4), %esi
2108 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %edx
2109 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2110 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
2111 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
2112 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
2113 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi
2114 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %ebp
2115 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp
2116 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%esp,%ecx,4), %esi
2117 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl
2118 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorb $31, %bl
2119 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
2120 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
2121 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
2122 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi
2123 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2124 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esp,%ecx,4), %esi
2125 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %eax
2126 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi
2127 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx
2128 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
2129 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 32(%esp,%ecx,4), %ecx
2130 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx
2131 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx
2132 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx
2133 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
2134 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax)
2135 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax)
2136 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax)
2137 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax)
2138 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $92, %esp
2139 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
2140 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
2141 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
2142 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
2143 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl