Lines Matching full:no

2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-NO-SHLD,X64-NO-BMI2-NO-SHLD
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-SHLD,X64-NO-BMI2-HAVE-SHLD
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-NO-SHLD,X64-HAVE-BMI2-NO-SHLD
6 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-NO-BMI2,X86-NO-SHLD,X86-NO-BMI2-NO-SHLD
7 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-NO-BMI2,X86-SHLD,X86-NO-BMI2-HAVE-SHLD
8 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-BMI2,X86-NO-SHLD,X86-HAVE-BMI2-NO-SHLD
11 ; no @load_1byte_chunk_of_1byte_alloca
14 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca:
15 ; X64-NO-BMI2: # %bb.0:
16 ; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax
17 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
18 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
19 ; X64-NO-BMI2-NEXT: shrl %cl, %eax
20 ; X64-NO-BMI2-NEXT: movb %al, (%rdx)
21 ; X64-NO-BMI2-NEXT: retq
31 ; X86-NO-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca:
32 ; X86-NO-BMI2: # %bb.0:
33 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
34 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
35 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
36 ; X86-NO-BMI2-NEXT: movzwl (%eax), %eax
37 ; X86-NO-BMI2-NEXT: shll $3, %ecx
38 ; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
39 ; X86-NO-BMI2-NEXT: shrl %cl, %eax
40 ; X86-NO-BMI2-NEXT: movb %al, (%edx)
41 ; X86-NO-BMI2-NEXT: retl
65 ; no @load_2byte_chunk_of_2byte_alloca
68 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca:
69 ; X64-NO-BMI2: # %bb.0:
70 ; X64-NO-BMI2-NEXT: movl (%rdi), %eax
71 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
72 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
73 ; X64-NO-BMI2-NEXT: shrl %cl, %eax
74 ; X64-NO-BMI2-NEXT: movb %al, (%rdx)
75 ; X64-NO-BMI2-NEXT: retq
84 ; X86-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca:
85 ; X86-NO-BMI2: # %bb.0:
86 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
87 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
88 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
89 ; X86-NO-BMI2-NEXT: movl (%eax), %eax
90 ; X86-NO-BMI2-NEXT: shll $3, %ecx
91 ; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
92 ; X86-NO-BMI2-NEXT: shrl %cl, %eax
93 ; X86-NO-BMI2-NEXT: movb %al, (%edx)
94 ; X86-NO-BMI2-NEXT: retl
118 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca:
119 ; X64-NO-BMI2: # %bb.0:
120 ; X64-NO-BMI2-NEXT: movl (%rdi), %eax
121 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
122 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
123 ; X64-NO-BMI2-NEXT: shrl %cl, %eax
124 ; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
125 ; X64-NO-BMI2-NEXT: retq
134 ; X86-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca:
135 ; X86-NO-BMI2: # %bb.0:
136 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
137 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
138 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
139 ; X86-NO-BMI2-NEXT: movl (%edx), %edx
140 ; X86-NO-BMI2-NEXT: shll $3, %ecx
141 ; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
142 ; X86-NO-BMI2-NEXT: shrl %cl, %edx
143 ; X86-NO-BMI2-NEXT: movw %dx, (%eax)
144 ; X86-NO-BMI2-NEXT: retl
166 ; no @load_4byte_chunk_of_4byte_alloca
169 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca:
170 ; X64-NO-BMI2: # %bb.0:
171 ; X64-NO-BMI2-NEXT: movq (%rdi), %rax
172 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
173 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
174 ; X64-NO-BMI2-NEXT: shrq %cl, %rax
175 ; X64-NO-BMI2-NEXT: movb %al, (%rdx)
176 ; X64-NO-BMI2-NEXT: retq
185 ; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca:
186 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
187 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
188 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
189 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
190 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
191 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
192 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
193 ; X86-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
194 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
195 ; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
196 ; X86-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
197 ; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx
198 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
199 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
200 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
201 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %edi
202 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
203 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi
204 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
205 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
206 ; X86-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
207 ; X86-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx
208 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%edx)
209 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
210 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %edi
211 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
212 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
214 ; X86-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca:
215 ; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
216 ; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
217 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
218 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
219 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
220 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
221 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
222 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
223 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
224 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
225 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
226 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
227 ; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
228 ; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
229 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, (%eax)
230 ; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
231 ; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
233 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca:
234 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
235 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
236 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
237 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
238 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
239 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
240 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
241 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
242 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
243 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
244 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
245 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
246 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
247 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
248 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
249 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
250 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
251 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi
252 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx
253 ; X86-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
254 ; X86-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx
255 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax)
256 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
257 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
258 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
259 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
293 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca:
294 ; X64-NO-BMI2: # %bb.0:
295 ; X64-NO-BMI2-NEXT: movq (%rdi), %rax
296 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
297 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
298 ; X64-NO-BMI2-NEXT: shrq %cl, %rax
299 ; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
300 ; X64-NO-BMI2-NEXT: retq
309 ; X86-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca:
310 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
311 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
312 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
313 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
314 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
315 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
316 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
317 ; X86-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
318 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
319 ; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi
320 ; X86-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
321 ; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
322 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
323 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
324 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
325 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx
326 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
327 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx
328 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
329 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
330 ; X86-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
331 ; X86-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi
332 ; X86-NO-BMI2-NO-SHLD-NEXT: movw %si, (%edx)
333 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
334 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %edi
335 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
336 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
338 ; X86-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca:
339 ; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
340 ; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
341 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
342 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
343 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
344 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
345 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
346 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
347 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
348 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
349 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
350 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
351 ; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
352 ; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
353 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax)
354 ; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
355 ; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
357 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca:
358 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
359 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
360 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
361 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
362 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
363 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
364 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
365 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
366 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
367 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
368 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
369 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
370 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
371 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
372 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
373 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
374 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
375 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi
376 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx
377 ; X86-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
378 ; X86-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx
379 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax)
380 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
381 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
382 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
383 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
414 ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca:
415 ; X64-NO-BMI2: # %bb.0:
416 ; X64-NO-BMI2-NEXT: movq (%rdi), %rax
417 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
418 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
419 ; X64-NO-BMI2-NEXT: shrq %cl, %rax
420 ; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
421 ; X64-NO-BMI2-NEXT: retq
430 ; X86-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca:
431 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
432 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
433 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
434 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
435 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
436 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
437 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
438 ; X86-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
439 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
440 ; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi
441 ; X86-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
442 ; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
443 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
444 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
445 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
446 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx
447 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
448 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx
449 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
450 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
451 ; X86-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
452 ; X86-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi
453 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%edx)
454 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
455 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %edi
456 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
457 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
459 ; X86-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca:
460 ; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
461 ; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
462 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
463 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
464 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
465 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
466 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
467 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
468 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
469 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
470 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
471 ; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
472 ; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
473 ; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
474 ; X86-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax)
475 ; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
476 ; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
478 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca:
479 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
480 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
481 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
482 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
483 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
484 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
485 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
486 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
487 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
488 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
489 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
490 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
491 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
492 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
493 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
494 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
495 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
496 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi
497 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx
498 ; X86-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
499 ; X86-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx
500 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
501 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
502 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
503 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
504 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
534 ; no @load_8byte_chunk_of_8byte_alloca
537 ; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
538 ; X64-NO-BMI2-NO-SHLD: # %bb.0:
539 ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
540 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
541 ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
542 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
543 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
544 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
545 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
546 ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
547 ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
548 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
549 ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi, %r8d
550 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
551 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
552 ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
553 ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax
554 ; X64-NO-BMI2-NO-SHLD-NEXT: movb %al, (%rdx)
555 ; X64-NO-BMI2-NO-SHLD-NEXT: retq
557 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
558 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
559 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
560 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
561 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
562 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
563 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
564 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
565 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
566 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
567 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
568 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
569 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx)
570 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
572 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
573 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
574 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
575 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
576 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
577 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
578 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
579 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
580 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
581 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx), %r8d
582 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
583 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
584 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
585 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
586 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
587 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx
588 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx)
589 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
606 ; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
607 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
608 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
609 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $40, %esp
610 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
611 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
612 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
613 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
614 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
615 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
616 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
617 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
618 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
619 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $3, %dl
620 ; X86-NO-BMI2-NO-SHLD-NEXT: andb $12, %dl
621 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
622 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx), %esi
623 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
624 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
625 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
626 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx
627 ; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
628 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
629 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
630 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %dl, (%eax)
631 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $40, %esp
632 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
633 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
660 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
661 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
662 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
663 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp
664 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
665 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
666 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
667 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
668 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
669 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
670 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
671 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
672 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
673 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
674 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
675 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
676 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
677 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
678 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
679 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
680 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
681 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
682 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax)
683 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp
684 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
685 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
699 ; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
700 ; X64-NO-BMI2-NO-SHLD: # %bb.0:
701 ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
702 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
703 ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
704 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
705 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
706 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
707 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
708 ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
709 ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
710 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
711 ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi, %r8d
712 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
713 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
714 ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
715 ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax
716 ; X64-NO-BMI2-NO-SHLD-NEXT: movw %ax, (%rdx)
717 ; X64-NO-BMI2-NO-SHLD-NEXT: retq
719 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
720 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
721 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
722 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
723 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
724 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
725 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
726 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
727 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
728 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
729 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
730 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
731 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx)
732 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
734 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
735 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
736 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
737 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
738 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
739 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
740 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
741 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
742 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
743 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx), %r8d
744 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
745 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
746 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
747 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
748 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
749 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx
750 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx)
751 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
768 ; X86-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
769 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
770 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
771 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $40, %esp
772 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
773 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
774 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
775 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
776 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
777 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
778 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
779 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
780 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
781 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $3, %dl
782 ; X86-NO-BMI2-NO-SHLD-NEXT: andb $12, %dl
783 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
784 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx), %esi
785 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
786 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
787 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
788 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx
789 ; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
790 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
791 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
792 ; X86-NO-BMI2-NO-SHLD-NEXT: movw %dx, (%eax)
793 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $40, %esp
794 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
795 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
822 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
823 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
824 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
825 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp
826 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
827 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
828 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
829 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
830 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
831 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
832 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
833 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
834 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
835 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
836 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
837 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
838 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
839 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
840 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
841 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
842 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
843 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
844 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%eax)
845 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp
846 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
847 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
860 ; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
861 ; X64-NO-BMI2-NO-SHLD: # %bb.0:
862 ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
863 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
864 ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
865 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
866 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
867 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
868 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
869 ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
870 ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
871 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
872 ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi, %r8d
873 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
874 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
875 ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
876 ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax
877 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%rdx)
878 ; X64-NO-BMI2-NO-SHLD-NEXT: retq
880 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
881 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
882 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
883 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
884 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
885 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
886 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
887 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
888 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
889 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
890 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
891 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
892 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx)
893 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
895 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
896 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
897 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
898 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
899 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
900 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
901 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
902 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
903 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
904 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx), %r8d
905 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
906 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
907 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
908 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
909 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
910 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx
911 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx)
912 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
929 ; X86-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
930 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
931 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
932 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $40, %esp
933 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
934 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
935 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
936 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
937 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
938 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
939 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
940 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
941 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
942 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $3, %dl
943 ; X86-NO-BMI2-NO-SHLD-NEXT: andb $12, %dl
944 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
945 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx), %esi
946 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
947 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
948 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
949 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx
950 ; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
951 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
952 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
953 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
954 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $40, %esp
955 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
956 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
983 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
984 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
985 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
986 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp
987 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
988 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
989 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
990 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
991 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
992 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
993 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
994 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
995 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
996 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
997 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
998 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
999 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
1000 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1001 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
1002 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1003 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1004 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1005 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax)
1006 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp
1007 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1008 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1021 ; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1022 ; X64-NO-BMI2-NO-SHLD: # %bb.0:
1023 ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
1024 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
1025 ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1026 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
1027 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
1028 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
1029 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
1030 ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
1031 ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
1032 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
1033 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8
1034 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
1035 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
1036 ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
1037 ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax
1038 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx)
1039 ; X64-NO-BMI2-NO-SHLD-NEXT: retq
1041 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1042 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
1043 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
1044 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
1045 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
1046 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
1047 ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1048 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
1049 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax
1050 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
1051 ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
1052 ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
1053 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx)
1054 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
1056 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1057 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
1058 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
1059 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
1060 ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1061 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
1062 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
1063 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
1064 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
1065 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
1066 ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
1067 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
1068 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
1069 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
1070 ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
1071 ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax
1072 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx)
1073 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
1090 ; X86-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1091 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
1092 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
1093 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
1094 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
1095 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
1096 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $44, %esp
1097 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1098 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1099 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1100 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%ecx), %xmm0
1101 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
1102 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
1103 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1104 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1105 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1106 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $3, %cl
1107 ; X86-NO-BMI2-NO-SHLD-NEXT: andb $12, %cl
1108 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %cl, %ebx
1109 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%ebx), %ebp
1110 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%ebx), %esi
1111 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1112 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
1113 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
1114 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
1115 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
1116 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi
1117 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1118 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
1119 ; X86-NO-BMI2-NO-SHLD-NEXT: andb $24, %al
1120 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %al
1121 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 8(%esp,%ebx), %ebx
1122 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx
1123 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1124 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
1125 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx
1126 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%edx)
1127 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%edx)
1128 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $44, %esp
1129 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
1130 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %edi
1131 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
1132 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebp
1133 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
1168 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
1169 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1170 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
1171 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
1172 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
1173 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1174 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp
1175 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1176 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1177 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1178 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1179 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1180 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm1, %xmm1
1181 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1182 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1183 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1184 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
1185 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
1186 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1187 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
1188 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
1189 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
1190 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edi
1191 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%esp,%edx), %ebp
1192 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx
1193 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx
1194 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx
1195 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %esi
1196 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
1197 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1198 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %ebp, %ebp
1199 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx
1200 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1201 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax)
1202 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
1203 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp
1204 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1205 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
1206 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
1207 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
1208 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1220 ; no @load_16byte_chunk_of_16byte_alloca
1223 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca:
1224 ; X64-NO-BMI2: # %bb.0:
1225 ; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0
1226 ; X64-NO-BMI2-NEXT: movups 16(%rdi), %xmm1
1227 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
1228 ; X64-NO-BMI2-NEXT: xorps %xmm2, %xmm2
1229 ; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1230 ; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1231 ; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1232 ; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1233 ; X64-NO-BMI2-NEXT: movl %ecx, %eax
1234 ; X64-NO-BMI2-NEXT: shrb $6, %al
1235 ; X64-NO-BMI2-NEXT: movzbl %al, %eax
1236 ; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rax
1237 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
1238 ; X64-NO-BMI2-NEXT: shrq %cl, %rax
1239 ; X64-NO-BMI2-NEXT: movb %al, (%rdx)
1240 ; X64-NO-BMI2-NEXT: retq
1259 ; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca:
1260 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
1261 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
1262 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $72, %esp
1263 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1264 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1265 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1266 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1267 ; X86-NO-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1268 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1269 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1270 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1271 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1272 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1273 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1274 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1275 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1276 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1277 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx,4), %esi
1278 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1279 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
1280 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
1281 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1282 ; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
1283 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
1284 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
1285 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %dl, (%eax)
1286 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $72, %esp
1287 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
1288 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
1317 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca:
1318 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1319 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1320 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $72, %esp
1321 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1322 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1323 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1324 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1325 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1326 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1327 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1328 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1329 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1330 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1331 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1332 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1333 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1334 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1335 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1336 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1337 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1338 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1339 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1340 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1341 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax)
1342 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $72, %esp
1343 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1344 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1358 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_32byte_alloca:
1359 ; X64-NO-BMI2: # %bb.0:
1360 ; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0
1361 ; X64-NO-BMI2-NEXT: movups 16(%rdi), %xmm1
1362 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
1363 ; X64-NO-BMI2-NEXT: xorps %xmm2, %xmm2
1364 ; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1365 ; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1366 ; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1367 ; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1368 ; X64-NO-BMI2-NEXT: movl %ecx, %eax
1369 ; X64-NO-BMI2-NEXT: shrb $6, %al
1370 ; X64-NO-BMI2-NEXT: movzbl %al, %eax
1371 ; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rsi
1372 ; X64-NO-BMI2-NEXT: shrq %cl, %rsi
1373 ; X64-NO-BMI2-NEXT: movl -64(%rsp,%rax,8), %eax
1374 ; X64-NO-BMI2-NEXT: addl %eax, %eax
1375 ; X64-NO-BMI2-NEXT: andb $56, %cl
1376 ; X64-NO-BMI2-NEXT: notb %cl
1377 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
1378 ; X64-NO-BMI2-NEXT: shlq %cl, %rax
1379 ; X64-NO-BMI2-NEXT: orl %esi, %eax
1380 ; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
1381 ; X64-NO-BMI2-NEXT: retq
1407 ; X86-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca:
1408 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
1409 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
1410 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $72, %esp
1411 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1412 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1413 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1414 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1415 ; X86-NO-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1416 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1417 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1418 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1419 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1420 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1421 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1422 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1423 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1424 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1425 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx,4), %esi
1426 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1427 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
1428 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
1429 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1430 ; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
1431 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
1432 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
1433 ; X86-NO-BMI2-NO-SHLD-NEXT: movw %dx, (%eax)
1434 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $72, %esp
1435 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
1436 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
1465 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca:
1466 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1467 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1468 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $72, %esp
1469 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1470 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1471 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1472 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1473 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1474 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1475 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1476 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1477 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1478 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1479 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1480 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1481 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1482 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1483 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1484 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1485 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1486 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1487 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1488 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1489 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%eax)
1490 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $72, %esp
1491 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1492 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1505 ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_32byte_alloca:
1506 ; X64-NO-BMI2: # %bb.0:
1507 ; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0
1508 ; X64-NO-BMI2-NEXT: movups 16(%rdi), %xmm1
1509 ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
1510 ; X64-NO-BMI2-NEXT: xorps %xmm2, %xmm2
1511 ; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1512 ; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1513 ; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1514 ; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1515 ; X64-NO-BMI2-NEXT: movl %ecx, %eax
1516 ; X64-NO-BMI2-NEXT: shrb $6, %al
1517 ; X64-NO-BMI2-NEXT: movzbl %al, %eax
1518 ; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rsi
1519 ; X64-NO-BMI2-NEXT: shrq %cl, %rsi
1520 ; X64-NO-BMI2-NEXT: movl -64(%rsp,%rax,8), %eax
1521 ; X64-NO-BMI2-NEXT: addl %eax, %eax
1522 ; X64-NO-BMI2-NEXT: andb $56, %cl
1523 ; X64-NO-BMI2-NEXT: notb %cl
1524 ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
1525 ; X64-NO-BMI2-NEXT: shlq %cl, %rax
1526 ; X64-NO-BMI2-NEXT: orl %esi, %eax
1527 ; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
1528 ; X64-NO-BMI2-NEXT: retq
1554 ; X86-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca:
1555 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
1556 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
1557 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $72, %esp
1558 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1559 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1560 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1561 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1562 ; X86-NO-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1563 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1564 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1565 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1566 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1567 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1568 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1569 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1570 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1571 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1572 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx,4), %esi
1573 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1574 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
1575 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
1576 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1577 ; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
1578 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
1579 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
1580 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
1581 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $72, %esp
1582 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
1583 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
1612 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca:
1613 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1614 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1615 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $72, %esp
1616 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1617 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1618 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1619 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1620 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1621 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1622 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1623 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1624 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1625 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1626 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1627 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1628 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1629 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1630 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1631 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1632 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1633 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1634 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1635 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1636 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax)
1637 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $72, %esp
1638 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1639 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1652 ; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca:
1653 ; X64-NO-BMI2-NO-SHLD: # %bb.0:
1654 ; X64-NO-BMI2-NO-SHLD-NEXT: movups (%rdi), %xmm0
1655 ; X64-NO-BMI2-NO-SHLD-NEXT: movups 16(%rdi), %xmm1
1656 ; X64-NO-BMI2-NO-SHLD-NEXT: leal (,%rsi,8), %ecx
1657 ; X64-NO-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1658 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1659 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1660 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1661 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1662 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
1663 ; X64-NO-BMI2-NO-SHLD-NEXT: shrb $6, %al
1664 ; X64-NO-BMI2-NO-SHLD-NEXT: movzbl %al, %eax
1665 ; X64-NO-BMI2-NO-SHLD-NEXT: movq -72(%rsp,%rax,8), %rsi
1666 ; X64-NO-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%rax,8), %rax
1667 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rsi
1668 ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
1669 ; X64-NO-BMI2-NO-SHLD-NEXT: addq %rax, %rax
1670 ; X64-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
1671 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax
1672 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rsi, %rax
1673 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx)
1674 ; X64-NO-BMI2-NO-SHLD-NEXT: retq
1696 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca:
1697 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
1698 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups (%rdi), %xmm0
1699 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%rdi), %xmm1
1700 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
1701 ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1702 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1703 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1704 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1705 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1706 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax
1707 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrb $6, %al
1708 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl %al, %eax
1709 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rcx
1710 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %sil
1711 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%rax,8), %rax
1712 ; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rax, %rax
1713 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rsi, %rax, %rax
1714 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rax
1715 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx)
1716 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
1718 ; X86-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca:
1719 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
1720 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
1721 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
1722 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
1723 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
1724 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $76, %esp
1725 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1726 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1727 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1728 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%ecx), %xmm0
1729 ; X86-NO-BMI2-NO-SHLD-NEXT: movups 16(%ecx), %xmm1
1730 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
1731 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1732 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1733 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1734 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1735 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1736 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1737 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $5, %cl
1738 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %cl, %ebx
1739 ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%ebx,4), %ebp
1740 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%ebx,4), %esi
1741 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1742 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
1743 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
1744 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
1745 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
1746 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi
1747 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1748 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
1749 ; X86-NO-BMI2-NO-SHLD-NEXT: andb $24, %al
1750 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %al
1751 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 8(%esp,%ebx,4), %ebx
1752 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx
1753 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1754 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
1755 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx
1756 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%edx)
1757 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%edx)
1758 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $76, %esp
1759 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
1760 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %edi
1761 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
1762 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebp
1763 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
1800 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca:
1801 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
1802 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
1803 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
1804 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
1805 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
1806 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $76, %esp
1807 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1808 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1809 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
1810 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%edx), %xmm0
1811 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%edx), %xmm1
1812 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
1813 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1814 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1815 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1816 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1817 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
1818 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
1819 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1820 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1821 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1822 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
1823 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
1824 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edi
1825 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%esp,%edx,4), %ebp
1826 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx
1827 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx
1828 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx
1829 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %esi
1830 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
1831 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1832 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %ebp, %ebp
1833 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx
1834 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1835 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax)
1836 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
1837 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $76, %esp
1838 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
1839 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
1840 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
1841 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
1842 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
1855 ; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
1856 ; X64-NO-BMI2-NO-SHLD: # %bb.0:
1857 ; X64-NO-BMI2-NO-SHLD-NEXT: movups (%rdi), %xmm0
1858 ; X64-NO-BMI2-NO-SHLD-NEXT: movups 16(%rdi), %xmm1
1859 ; X64-NO-BMI2-NO-SHLD-NEXT: leal (,%rsi,8), %eax
1860 ; X64-NO-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1861 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1862 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1863 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1864 ; X64-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1865 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1866 ; X64-NO-BMI2-NO-SHLD-NEXT: shrb $6, %cl
1867 ; X64-NO-BMI2-NO-SHLD-NEXT: movzbl %cl, %edi
1868 ; X64-NO-BMI2-NO-SHLD-NEXT: movq -72(%rsp,%rdi,8), %r8
1869 ; X64-NO-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%rdi,8), %r9
1870 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1871 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8
1872 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
1873 ; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil
1874 ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r10
1875 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
1876 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r10
1877 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r10
1878 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1879 ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9
1880 ; X64-NO-BMI2-NO-SHLD-NEXT: movq -56(%rsp,%rdi,8), %rax
1881 ; X64-NO-BMI2-NO-SHLD-NEXT: addq %rax, %rax
1882 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
1883 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax
1884 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rax
1885 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, 8(%rdx)
1886 ; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, (%rdx)
1887 ; X64-NO-BMI2-NO-SHLD-NEXT: retq
1889 ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
1890 ; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
1891 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movups (%rdi), %xmm0
1892 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movups 16(%rdi), %xmm1
1893 ; X64-NO-BMI2-HAVE-SHLD-NEXT: leal (,%rsi,8), %eax
1894 ; X64-NO-BMI2-HAVE-SHLD-NEXT: xorps %xmm2, %xmm2
1895 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1896 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1897 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1898 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1899 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
1900 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrb $6, %cl
1901 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl %cl, %esi
1902 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -72(%rsp,%rsi,8), %rdi
1903 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -64(%rsp,%rsi,8), %r8
1904 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r9
1905 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
1906 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r9
1907 ; X64-NO-BMI2-HAVE-SHLD-NEXT: notb %cl
1908 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -56(%rsp,%rsi,8), %rsi
1909 ; X64-NO-BMI2-HAVE-SHLD-NEXT: addq %rsi, %rsi
1910 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rsi
1911 ; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r9, %rsi
1912 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
1913 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi
1914 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx)
1915 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, 8(%rdx)
1916 ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
1918 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
1919 ; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
1920 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups (%rdi), %xmm0
1921 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%rdi), %xmm1
1922 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
1923 ; X64-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1924 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1925 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
1926 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1927 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1928 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax
1929 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrb $6, %al
1930 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl %al, %eax
1931 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rcx
1932 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%rax,8), %rdi
1933 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rdi, %r8
1934 ; X64-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
1935 ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %sil
1936 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -56(%rsp,%rax,8), %rax
1937 ; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rdi, %rdi
1938 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rsi, %rdi, %rdi
1939 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
1940 ; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rax, %rax
1941 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rsi, %rax, %rax
1942 ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %rax
1943 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 8(%rdx)
1944 ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx)
1945 ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
1975 ; X86-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
1976 ; X86-NO-BMI2-NO-SHLD: # %bb.0:
1977 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
1978 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
1979 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
1980 ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
1981 ; X86-NO-BMI2-NO-SHLD-NEXT: subl $92, %esp
1982 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
1983 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
1984 ; X86-NO-BMI2-NO-SHLD-NEXT: movups (%ecx), %xmm0
1985 ; X86-NO-BMI2-NO-SHLD-NEXT: movups 16(%ecx), %xmm1
1986 ; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
1987 ; X86-NO-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
1988 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1989 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
1990 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1991 ; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
1992 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1993 ; X86-NO-BMI2-NO-SHLD-NEXT: shrb $5, %cl
1994 ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %cl, %edi
1995 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 16(%esp,%edi,4), %edx
1996 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 20(%esp,%edi,4), %ebp
1997 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
1998 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
1999 ; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
2000 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2001 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %esi
2002 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
2003 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi
2004 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2005 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
2006 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
2007 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch
2008 ; X86-NO-BMI2-NO-SHLD-NEXT: andb $24, %ch
2009 ; X86-NO-BMI2-NO-SHLD-NEXT: xorb $31, %ch
2010 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 24(%esp,%edi,4), %edx
2011 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebx
2012 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
2013 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
2014 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx
2015 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
2016 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
2017 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 28(%esp,%edi,4), %esi
2018 ; X86-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp
2019 ; X86-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
2020 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp
2021 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp
2022 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
2023 ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
2024 ; X86-NO-BMI2-NO-SHLD-NEXT: movl 32(%esp,%edi,4), %eax
2025 ; X86-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax
2026 ; X86-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
2027 ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
2028 ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax
2029 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
2030 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx)
2031 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx)
2032 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%ecx)
2033 ; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2034 ; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx)
2035 ; X86-NO-BMI2-NO-SHLD-NEXT: addl $92, %esp
2036 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
2037 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %edi
2038 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
2039 ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebp
2040 ; X86-NO-BMI2-NO-SHLD-NEXT: retl
2086 ; X86-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
2087 ; X86-HAVE-BMI2-NO-SHLD: # %bb.0:
2088 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
2089 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
2090 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
2091 ; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
2092 ; X86-HAVE-BMI2-NO-SHLD-NEXT: subl $92, %esp
2093 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
2094 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
2095 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups (%ecx), %xmm0
2096 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movups 16(%ecx), %xmm1
2097 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax
2098 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorps %xmm2, %xmm2
2099 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
2100 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
2101 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
2102 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
2103 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
2104 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %cl
2105 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %cl, %ecx
2106 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, 16(%esp,%ecx,4), %edi
2107 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esp,%ecx,4), %esi
2108 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %edx
2109 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2110 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
2111 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
2112 ; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
2113 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi
2114 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %ebp
2115 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp
2116 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%esp,%ecx,4), %esi
2117 ; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl
2118 ; X86-HAVE-BMI2-NO-SHLD-NEXT: xorb $31, %bl
2119 ; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
2120 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
2121 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
2122 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi
2123 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2124 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esp,%ecx,4), %esi
2125 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %eax
2126 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi
2127 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx
2128 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
2129 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 32(%esp,%ecx,4), %ecx
2130 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx
2131 ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx
2132 ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx
2133 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
2134 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax)
2135 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax)
2136 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax)
2137 ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax)
2138 ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $92, %esp
2139 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
2140 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
2141 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
2142 ; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
2143 ; X86-HAVE-BMI2-NO-SHLD-NEXT: retl
2155 ; no @load_32byte_chunk_of_32byte_alloca
2159 ; X64-NO-SHLD: {{.*}}
2161 ; X86-NO-SHLD: {{.*}}