xref: /llvm-project/llvm/test/CodeGen/X86/funnel-shift.ll (revision 14304055e0d223a6dd224625b8fd128e6f711eb5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686--   -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,X86-SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64-AVX,X64-AVX2
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64-AVX,X64-VBMI2
5
6declare i8 @llvm.fshl.i8(i8, i8, i8)
7declare i16 @llvm.fshl.i16(i16, i16, i16)
8declare i32 @llvm.fshl.i32(i32, i32, i32)
9declare i64 @llvm.fshl.i64(i64, i64, i64)
10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
11declare i128 @llvm.fshl.i128(i128, i128, i128)
12
13declare i8 @llvm.fshr.i8(i8, i8, i8)
14declare i16 @llvm.fshr.i16(i16, i16, i16)
15declare i32 @llvm.fshr.i32(i32, i32, i32)
16declare i64 @llvm.fshr.i64(i64, i64, i64)
17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
18
19; General case - all operands can be variables
20
21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
22; X86-SSE2-LABEL: fshl_i32:
23; X86-SSE2:       # %bb.0:
24; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
25; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
26; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
28; X86-SSE2-NEXT:    retl
29;
30; X64-AVX-LABEL: fshl_i32:
31; X64-AVX:       # %bb.0:
32; X64-AVX-NEXT:    movl %edx, %ecx
33; X64-AVX-NEXT:    movl %edi, %eax
34; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
35; X64-AVX-NEXT:    shldl %cl, %esi, %eax
36; X64-AVX-NEXT:    retq
37  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
38  ret i32 %f
39}
40
41define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind {
42; X86-SSE2-LABEL: fshl_i64:
43; X86-SSE2:       # %bb.0:
44; X86-SSE2-NEXT:    pushl %edi
45; X86-SSE2-NEXT:    pushl %esi
46; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
47; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
48; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
49; X86-SSE2-NEXT:    testb $32, %cl
50; X86-SSE2-NEXT:    movl %edx, %edi
51; X86-SSE2-NEXT:    cmovnel %esi, %edi
52; X86-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %edx
53; X86-SSE2-NEXT:    cmovnel {{[0-9]+}}(%esp), %esi
54; X86-SSE2-NEXT:    movl %edi, %eax
55; X86-SSE2-NEXT:    shldl %cl, %esi, %eax
56; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
57; X86-SSE2-NEXT:    shldl %cl, %edi, %edx
58; X86-SSE2-NEXT:    popl %esi
59; X86-SSE2-NEXT:    popl %edi
60; X86-SSE2-NEXT:    retl
61;
62; X64-AVX-LABEL: fshl_i64:
63; X64-AVX:       # %bb.0:
64; X64-AVX-NEXT:    movq %rdx, %rcx
65; X64-AVX-NEXT:    movq %rdi, %rax
66; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $rcx
67; X64-AVX-NEXT:    shldq %cl, %rsi, %rax
68; X64-AVX-NEXT:    retq
69  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
70  ret i64 %f
71}
72
73define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
74; X86-SSE2-LABEL: fshl_i128:
75; X86-SSE2:       # %bb.0:
76; X86-SSE2-NEXT:    pushl %ebp
77; X86-SSE2-NEXT:    pushl %ebx
78; X86-SSE2-NEXT:    pushl %edi
79; X86-SSE2-NEXT:    pushl %esi
80; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
81; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
82; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
83; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
84; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
85; X86-SSE2-NEXT:    testb $64, %cl
86; X86-SSE2-NEXT:    movl %esi, %eax
87; X86-SSE2-NEXT:    cmovnel %ebx, %eax
88; X86-SSE2-NEXT:    movl %edx, %ebp
89; X86-SSE2-NEXT:    cmovnel %edi, %ebp
90; X86-SSE2-NEXT:    cmovnel {{[0-9]+}}(%esp), %edi
91; X86-SSE2-NEXT:    cmovnel {{[0-9]+}}(%esp), %ebx
92; X86-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %edx
93; X86-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %esi
94; X86-SSE2-NEXT:    testb $32, %cl
95; X86-SSE2-NEXT:    cmovnel %esi, %edx
96; X86-SSE2-NEXT:    cmovnel %ebp, %esi
97; X86-SSE2-NEXT:    cmovnel %eax, %ebp
98; X86-SSE2-NEXT:    cmovel %edi, %ebx
99; X86-SSE2-NEXT:    cmovel %eax, %edi
100; X86-SSE2-NEXT:    movl %edi, %eax
101; X86-SSE2-NEXT:    shldl %cl, %ebx, %eax
102; X86-SSE2-NEXT:    movl %ebp, %ebx
103; X86-SSE2-NEXT:    shldl %cl, %edi, %ebx
104; X86-SSE2-NEXT:    movl %esi, %edi
105; X86-SSE2-NEXT:    shldl %cl, %ebp, %edi
106; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
107; X86-SSE2-NEXT:    shldl %cl, %esi, %edx
108; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
109; X86-SSE2-NEXT:    movl %edx, 12(%ecx)
110; X86-SSE2-NEXT:    movl %edi, 8(%ecx)
111; X86-SSE2-NEXT:    movl %ebx, 4(%ecx)
112; X86-SSE2-NEXT:    movl %eax, (%ecx)
113; X86-SSE2-NEXT:    movl %ecx, %eax
114; X86-SSE2-NEXT:    popl %esi
115; X86-SSE2-NEXT:    popl %edi
116; X86-SSE2-NEXT:    popl %ebx
117; X86-SSE2-NEXT:    popl %ebp
118; X86-SSE2-NEXT:    retl $4
119;
120; X64-AVX-LABEL: fshl_i128:
121; X64-AVX:       # %bb.0:
122; X64-AVX-NEXT:    testb $64, %r8b
123; X64-AVX-NEXT:    cmovneq %rdi, %rsi
124; X64-AVX-NEXT:    cmoveq %rcx, %rdx
125; X64-AVX-NEXT:    cmovneq %rcx, %rdi
126; X64-AVX-NEXT:    movq %rdi, %rax
127; X64-AVX-NEXT:    movl %r8d, %ecx
128; X64-AVX-NEXT:    shldq %cl, %rdx, %rax
129; X64-AVX-NEXT:    shldq %cl, %rdi, %rsi
130; X64-AVX-NEXT:    movq %rsi, %rdx
131; X64-AVX-NEXT:    retq
132  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
133  ret i128 %f
134}
135
136; Verify that weird types are minimally supported.
137declare i37 @llvm.fshl.i37(i37, i37, i37)
138define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
139; X86-SSE2-LABEL: fshl_i37:
140; X86-SSE2:       # %bb.0:
141; X86-SSE2-NEXT:    pushl %ebx
142; X86-SSE2-NEXT:    pushl %edi
143; X86-SSE2-NEXT:    pushl %esi
144; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
145; X86-SSE2-NEXT:    andl $31, %eax
146; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
147; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
148; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
149; X86-SSE2-NEXT:    shldl $27, %ebx, %edi
150; X86-SSE2-NEXT:    pushl $0
151; X86-SSE2-NEXT:    pushl $37
152; X86-SSE2-NEXT:    pushl %eax
153; X86-SSE2-NEXT:    pushl {{[0-9]+}}(%esp)
154; X86-SSE2-NEXT:    calll __umoddi3
155; X86-SSE2-NEXT:    addl $16, %esp
156; X86-SSE2-NEXT:    movl %eax, %ecx
157; X86-SSE2-NEXT:    testb $32, %cl
158; X86-SSE2-NEXT:    jne .LBB3_1
159; X86-SSE2-NEXT:  # %bb.2:
160; X86-SSE2-NEXT:    movl %edi, %ebx
161; X86-SSE2-NEXT:    movl %esi, %edi
162; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
163; X86-SSE2-NEXT:    jmp .LBB3_3
164; X86-SSE2-NEXT:  .LBB3_1:
165; X86-SSE2-NEXT:    shll $27, %ebx
166; X86-SSE2-NEXT:  .LBB3_3:
167; X86-SSE2-NEXT:    movl %edi, %eax
168; X86-SSE2-NEXT:    shldl %cl, %ebx, %eax
169; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
170; X86-SSE2-NEXT:    shldl %cl, %edi, %esi
171; X86-SSE2-NEXT:    movl %esi, %edx
172; X86-SSE2-NEXT:    popl %esi
173; X86-SSE2-NEXT:    popl %edi
174; X86-SSE2-NEXT:    popl %ebx
175; X86-SSE2-NEXT:    retl
176;
177; X64-AVX-LABEL: fshl_i37:
178; X64-AVX:       # %bb.0:
179; X64-AVX-NEXT:    movq %rdx, %rcx
180; X64-AVX-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
181; X64-AVX-NEXT:    andq %rdx, %rax
182; X64-AVX-NEXT:    movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
183; X64-AVX-NEXT:    mulq %rdx
184; X64-AVX-NEXT:    leal (%rdx,%rdx,8), %eax
185; X64-AVX-NEXT:    leal (%rdx,%rax,4), %eax
186; X64-AVX-NEXT:    subl %eax, %ecx
187; X64-AVX-NEXT:    shlq $27, %rsi
188; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $rcx
189; X64-AVX-NEXT:    shldq %cl, %rsi, %rdi
190; X64-AVX-NEXT:    movq %rdi, %rax
191; X64-AVX-NEXT:    retq
192  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
193  ret i37 %f
194}
195
196; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
197
198declare i7 @llvm.fshl.i7(i7, i7, i7)
199define i7 @fshl_i7_const_fold() {
200; CHECK-LABEL: fshl_i7_const_fold:
201; CHECK:       # %bb.0:
202; CHECK-NEXT:    movb $67, %al
203; CHECK-NEXT:    ret{{[l|q]}}
204  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
205  ret i7 %f
206}
207
208; With constant shift amount, this is 'shld' with constant operand.
209
210define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind {
211; X86-SSE2-LABEL: fshl_i32_const_shift:
212; X86-SSE2:       # %bb.0:
213; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
214; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
215; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
216; X86-SSE2-NEXT:    retl
217;
218; X64-AVX-LABEL: fshl_i32_const_shift:
219; X64-AVX:       # %bb.0:
220; X64-AVX-NEXT:    movl %edi, %eax
221; X64-AVX-NEXT:    shldl $9, %esi, %eax
222; X64-AVX-NEXT:    retq
223  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
224  ret i32 %f
225}
226
227; Check modulo math on shift amount.
228
229define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind {
230; X86-SSE2-LABEL: fshl_i32_const_overshift:
231; X86-SSE2:       # %bb.0:
232; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
233; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
234; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
235; X86-SSE2-NEXT:    retl
236;
237; X64-AVX-LABEL: fshl_i32_const_overshift:
238; X64-AVX:       # %bb.0:
239; X64-AVX-NEXT:    movl %edi, %eax
240; X64-AVX-NEXT:    shldl $9, %esi, %eax
241; X64-AVX-NEXT:    retq
242  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
243  ret i32 %f
244}
245
246; 64-bit should also work.
247
248define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind {
249; X86-SSE2-LABEL: fshl_i64_const_overshift:
250; X86-SSE2:       # %bb.0:
251; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
252; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
253; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
254; X86-SSE2-NEXT:    shldl $9, %ecx, %edx
255; X86-SSE2-NEXT:    shrdl $23, %ecx, %eax
256; X86-SSE2-NEXT:    retl
257;
258; X64-AVX-LABEL: fshl_i64_const_overshift:
259; X64-AVX:       # %bb.0:
260; X64-AVX-NEXT:    movq %rdi, %rax
261; X64-AVX-NEXT:    shldq $41, %rsi, %rax
262; X64-AVX-NEXT:    retq
263  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
264  ret i64 %f
265}
266
267; This should work without any node-specific logic.
268
269define i8 @fshl_i8_const_fold() nounwind {
270; CHECK-LABEL: fshl_i8_const_fold:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    movb $-128, %al
273; CHECK-NEXT:    ret{{[l|q]}}
274  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
275  ret i8 %f
276}
277
278; Repeat everything for funnel shift right.
279
280; General case - all operands can be variables
281
282define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
283; X86-SSE2-LABEL: fshr_i32:
284; X86-SSE2:       # %bb.0:
285; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
286; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
287; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
288; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
289; X86-SSE2-NEXT:    retl
290;
291; X64-AVX-LABEL: fshr_i32:
292; X64-AVX:       # %bb.0:
293; X64-AVX-NEXT:    movl %edx, %ecx
294; X64-AVX-NEXT:    movl %esi, %eax
295; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
296; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
297; X64-AVX-NEXT:    retq
298  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
299  ret i32 %f
300}
301
302; Verify that weird types are minimally supported.
303declare i37 @llvm.fshr.i37(i37, i37, i37)
304define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
305; X86-SSE2-LABEL: fshr_i37:
306; X86-SSE2:       # %bb.0:
307; X86-SSE2-NEXT:    pushl %ebx
308; X86-SSE2-NEXT:    pushl %edi
309; X86-SSE2-NEXT:    pushl %esi
310; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
311; X86-SSE2-NEXT:    andl $31, %eax
312; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
313; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
314; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
315; X86-SSE2-NEXT:    shldl $27, %ebx, %esi
316; X86-SSE2-NEXT:    pushl $0
317; X86-SSE2-NEXT:    pushl $37
318; X86-SSE2-NEXT:    pushl %eax
319; X86-SSE2-NEXT:    pushl {{[0-9]+}}(%esp)
320; X86-SSE2-NEXT:    calll __umoddi3
321; X86-SSE2-NEXT:    addl $16, %esp
322; X86-SSE2-NEXT:    movl %eax, %ecx
323; X86-SSE2-NEXT:    addl $27, %ecx
324; X86-SSE2-NEXT:    testb $32, %cl
325; X86-SSE2-NEXT:    je .LBB10_1
326; X86-SSE2-NEXT:  # %bb.2:
327; X86-SSE2-NEXT:    movl %edi, %edx
328; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
329; X86-SSE2-NEXT:    jmp .LBB10_3
330; X86-SSE2-NEXT:  .LBB10_1:
331; X86-SSE2-NEXT:    shll $27, %ebx
332; X86-SSE2-NEXT:    movl %esi, %edx
333; X86-SSE2-NEXT:    movl %ebx, %esi
334; X86-SSE2-NEXT:  .LBB10_3:
335; X86-SSE2-NEXT:    shrdl %cl, %edx, %esi
336; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
337; X86-SSE2-NEXT:    shrdl %cl, %edi, %edx
338; X86-SSE2-NEXT:    movl %esi, %eax
339; X86-SSE2-NEXT:    popl %esi
340; X86-SSE2-NEXT:    popl %edi
341; X86-SSE2-NEXT:    popl %ebx
342; X86-SSE2-NEXT:    retl
343;
344; X64-AVX-LABEL: fshr_i37:
345; X64-AVX:       # %bb.0:
346; X64-AVX-NEXT:    movq %rdx, %rcx
347; X64-AVX-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
348; X64-AVX-NEXT:    andq %rdx, %rax
349; X64-AVX-NEXT:    movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
350; X64-AVX-NEXT:    mulq %rdx
351; X64-AVX-NEXT:    leal (%rdx,%rdx,8), %eax
352; X64-AVX-NEXT:    leal (%rdx,%rax,4), %eax
353; X64-AVX-NEXT:    subl %eax, %ecx
354; X64-AVX-NEXT:    addl $27, %ecx
355; X64-AVX-NEXT:    shlq $27, %rsi
356; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $rcx
357; X64-AVX-NEXT:    shrdq %cl, %rdi, %rsi
358; X64-AVX-NEXT:    movq %rsi, %rax
359; X64-AVX-NEXT:    retq
360  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
361  ret i37 %f
362}
363
364; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
365
366declare i7 @llvm.fshr.i7(i7, i7, i7)
367define i7 @fshr_i7_const_fold() nounwind {
368; CHECK-LABEL: fshr_i7_const_fold:
369; CHECK:       # %bb.0:
370; CHECK-NEXT:    movb $31, %al
371; CHECK-NEXT:    ret{{[l|q]}}
372  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
373  ret i7 %f
374}
375
376; demanded bits tests
377
378define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
379; X86-SSE2-LABEL: fshl_i32_demandedbits:
380; X86-SSE2:       # %bb.0:
381; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
382; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
383; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
384; X86-SSE2-NEXT:    retl
385;
386; X64-AVX-LABEL: fshl_i32_demandedbits:
387; X64-AVX:       # %bb.0:
388; X64-AVX-NEXT:    movl %edi, %eax
389; X64-AVX-NEXT:    shldl $9, %esi, %eax
390; X64-AVX-NEXT:    retq
391  %x = or i32 %a0, 2147483648
392  %y = or i32 %a1, 1
393  %res = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
394  ret i32 %res
395}
396
397define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
398; X86-SSE2-LABEL: fshr_i32_demandedbits:
399; X86-SSE2:       # %bb.0:
400; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
401; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
402; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
403; X86-SSE2-NEXT:    retl
404;
405; X64-AVX-LABEL: fshr_i32_demandedbits:
406; X64-AVX:       # %bb.0:
407; X64-AVX-NEXT:    movl %edi, %eax
408; X64-AVX-NEXT:    shldl $23, %esi, %eax
409; X64-AVX-NEXT:    retq
410  %x = or i32 %a0, 2147483648
411  %y = or i32 %a1, 1
412  %res = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
413  ret i32 %res
414}
415
416; undef handling
417
418define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind {
419; X86-SSE2-LABEL: fshl_i32_undef0:
420; X86-SSE2:       # %bb.0:
421; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
422; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
423; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
424; X86-SSE2-NEXT:    retl
425;
426; X64-AVX-LABEL: fshl_i32_undef0:
427; X64-AVX:       # %bb.0:
428; X64-AVX-NEXT:    movl %esi, %ecx
429; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
430; X64-AVX-NEXT:    shldl %cl, %edi, %eax
431; X64-AVX-NEXT:    retq
432  %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %a1)
433  ret i32 %res
434}
435
436define i32 @fshl_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
437; X86-SSE2-LABEL: fshl_i32_undef0_msk:
438; X86-SSE2:       # %bb.0:
439; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
440; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
441; X86-SSE2-NEXT:    andl $7, %ecx
442; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
443; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
444; X86-SSE2-NEXT:    retl
445;
446; X64-AVX-LABEL: fshl_i32_undef0_msk:
447; X64-AVX:       # %bb.0:
448; X64-AVX-NEXT:    movl %esi, %ecx
449; X64-AVX-NEXT:    andl $7, %ecx
450; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
451; X64-AVX-NEXT:    shldl %cl, %edi, %eax
452; X64-AVX-NEXT:    retq
453  %m = and i32 %a1, 7
454  %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %m)
455  ret i32 %res
456}
457
458define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
459; X86-SSE2-LABEL: fshl_i32_undef0_cst:
460; X86-SSE2:       # %bb.0:
461; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
462; X86-SSE2-NEXT:    shrl $23, %eax
463; X86-SSE2-NEXT:    retl
464;
465; X64-AVX-LABEL: fshl_i32_undef0_cst:
466; X64-AVX:       # %bb.0:
467; X64-AVX-NEXT:    movl %edi, %eax
468; X64-AVX-NEXT:    shrl $23, %eax
469; X64-AVX-NEXT:    retq
470  %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
471  ret i32 %res
472}
473
474define <4 x i32> @fshl_v4i32_undef0_cst(<4 x i32> %a0) nounwind {
475; X86-SSE2-LABEL: fshl_v4i32_undef0_cst:
476; X86-SSE2:       # %bb.0:
477; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
478; X86-SSE2-NEXT:    psrld $20, %xmm1
479; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
480; X86-SSE2-NEXT:    psrld $21, %xmm2
481; X86-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
482; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
483; X86-SSE2-NEXT:    psrld $22, %xmm1
484; X86-SSE2-NEXT:    psrld $23, %xmm0
485; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
486; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
487; X86-SSE2-NEXT:    retl
488;
489; X64-AVX2-LABEL: fshl_v4i32_undef0_cst:
490; X64-AVX2:       # %bb.0:
491; X64-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
492; X64-AVX2-NEXT:    retq
493;
494; X64-VBMI2-LABEL: fshl_v4i32_undef0_cst:
495; X64-VBMI2:       # %bb.0:
496; X64-VBMI2-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
497; X64-VBMI2-NEXT:    retq
498  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> undef, <4 x i32> %a0, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
499  ret <4 x i32> %res
500}
501
502define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
503; X86-SSE2-LABEL: fshl_i32_undef1:
504; X86-SSE2:       # %bb.0:
505; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
506; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
507; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
508; X86-SSE2-NEXT:    retl
509;
510; X64-AVX-LABEL: fshl_i32_undef1:
511; X64-AVX:       # %bb.0:
512; X64-AVX-NEXT:    movl %esi, %ecx
513; X64-AVX-NEXT:    movl %edi, %eax
514; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
515; X64-AVX-NEXT:    shldl %cl, %eax, %eax
516; X64-AVX-NEXT:    retq
517  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1)
518  ret i32 %res
519}
520
521define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
522; X86-SSE2-LABEL: fshl_i32_undef1_msk:
523; X86-SSE2:       # %bb.0:
524; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
525; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
526; X86-SSE2-NEXT:    andb $7, %cl
527; X86-SSE2-NEXT:    shll %cl, %eax
528; X86-SSE2-NEXT:    retl
529;
530; X64-AVX-LABEL: fshl_i32_undef1_msk:
531; X64-AVX:       # %bb.0:
532; X64-AVX-NEXT:    movl %esi, %ecx
533; X64-AVX-NEXT:    movl %edi, %eax
534; X64-AVX-NEXT:    andb $7, %cl
535; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
536; X64-AVX-NEXT:    shll %cl, %eax
537; X64-AVX-NEXT:    retq
538  %m = and i32 %a1, 7
539  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
540  ret i32 %res
541}
542
543define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind {
544; X86-SSE2-LABEL: fshl_i32_undef1_cst:
545; X86-SSE2:       # %bb.0:
546; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
547; X86-SSE2-NEXT:    shll $9, %eax
548; X86-SSE2-NEXT:    retl
549;
550; X64-AVX-LABEL: fshl_i32_undef1_cst:
551; X64-AVX:       # %bb.0:
552; X64-AVX-NEXT:    movl %edi, %eax
553; X64-AVX-NEXT:    shll $9, %eax
554; X64-AVX-NEXT:    retq
555  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
556  ret i32 %res
557}
558
559define <4 x i32> @fshl_v4i32_undef1_cst(<4 x i32> %a0) nounwind {
560; X86-SSE2-LABEL: fshl_v4i32_undef1_cst:
561; X86-SSE2:       # %bb.0:
562; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
563; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
564; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
565; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
566; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
567; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
568; X86-SSE2-NEXT:    retl
569;
570; X64-AVX-LABEL: fshl_v4i32_undef1_cst:
571; X64-AVX:       # %bb.0:
572; X64-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
573; X64-AVX-NEXT:    retq
574  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
575  ret <4 x i32> %res
576}
577
578define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind {
579; X86-SSE2-LABEL: fshl_i32_undef2:
580; X86-SSE2:       # %bb.0:
581; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
582; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
583; X86-SSE2-NEXT:    shldl %cl, %ecx, %eax
584; X86-SSE2-NEXT:    retl
585;
586; X64-AVX-LABEL: fshl_i32_undef2:
587; X64-AVX:       # %bb.0:
588; X64-AVX-NEXT:    movl %edi, %eax
589; X64-AVX-NEXT:    shldl %cl, %esi, %eax
590; X64-AVX-NEXT:    retq
591  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 undef)
592  ret i32 %res
593}
594
595define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind {
596; X86-SSE2-LABEL: fshr_i32_undef0:
597; X86-SSE2:       # %bb.0:
598; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
599; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
600; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
601; X86-SSE2-NEXT:    retl
602;
603; X64-AVX-LABEL: fshr_i32_undef0:
604; X64-AVX:       # %bb.0:
605; X64-AVX-NEXT:    movl %esi, %ecx
606; X64-AVX-NEXT:    movl %edi, %eax
607; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
608; X64-AVX-NEXT:    shrdl %cl, %eax, %eax
609; X64-AVX-NEXT:    retq
610  %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1)
611  ret i32 %res
612}
613
614define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
615; X86-SSE2-LABEL: fshr_i32_undef0_msk:
616; X86-SSE2:       # %bb.0:
617; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
618; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
619; X86-SSE2-NEXT:    andb $7, %cl
620; X86-SSE2-NEXT:    shrl %cl, %eax
621; X86-SSE2-NEXT:    retl
622;
623; X64-AVX-LABEL: fshr_i32_undef0_msk:
624; X64-AVX:       # %bb.0:
625; X64-AVX-NEXT:    movl %esi, %ecx
626; X64-AVX-NEXT:    movl %edi, %eax
627; X64-AVX-NEXT:    andb $7, %cl
628; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
629; X64-AVX-NEXT:    shrl %cl, %eax
630; X64-AVX-NEXT:    retq
631  %m = and i32 %a1, 7
632  %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
633  ret i32 %res
634}
635
636define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
637; X86-SSE2-LABEL: fshr_i32_undef0_cst:
638; X86-SSE2:       # %bb.0:
639; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
640; X86-SSE2-NEXT:    shrl $9, %eax
641; X86-SSE2-NEXT:    retl
642;
643; X64-AVX-LABEL: fshr_i32_undef0_cst:
644; X64-AVX:       # %bb.0:
645; X64-AVX-NEXT:    movl %edi, %eax
646; X64-AVX-NEXT:    shrl $9, %eax
647; X64-AVX-NEXT:    retq
648  %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
649  ret i32 %res
650}
651
652define <4 x i32> @fshr_v4i32_undef0_cst(<4 x i32> %a0) nounwind {
653; X86-SSE2-LABEL: fshr_v4i32_undef0_cst:
654; X86-SSE2:       # %bb.0:
655; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
656; X86-SSE2-NEXT:    psrld $12, %xmm1
657; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
658; X86-SSE2-NEXT:    psrld $11, %xmm2
659; X86-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
660; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
661; X86-SSE2-NEXT:    psrld $10, %xmm1
662; X86-SSE2-NEXT:    psrld $9, %xmm0
663; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
664; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
665; X86-SSE2-NEXT:    retl
666;
667; X64-AVX-LABEL: fshr_v4i32_undef0_cst:
668; X64-AVX:       # %bb.0:
669; X64-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
670; X64-AVX-NEXT:    retq
671  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> undef, <4 x i32> %a0, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
672  ret <4 x i32> %res
673}
674
675define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
676; X86-SSE2-LABEL: fshr_i32_undef1:
677; X86-SSE2:       # %bb.0:
678; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
679; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
680; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
681; X86-SSE2-NEXT:    retl
682;
683; X64-AVX-LABEL: fshr_i32_undef1:
684; X64-AVX:       # %bb.0:
685; X64-AVX-NEXT:    movl %esi, %ecx
686; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
687; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
688; X64-AVX-NEXT:    retq
689  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %a1)
690  ret i32 %res
691}
692
693define i32 @fshr_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
694; X86-SSE2-LABEL: fshr_i32_undef1_msk:
695; X86-SSE2:       # %bb.0:
696; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
697; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
698; X86-SSE2-NEXT:    andl $7, %ecx
699; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
700; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
701; X86-SSE2-NEXT:    retl
702;
703; X64-AVX-LABEL: fshr_i32_undef1_msk:
704; X64-AVX:       # %bb.0:
705; X64-AVX-NEXT:    movl %esi, %ecx
706; X64-AVX-NEXT:    andl $7, %ecx
707; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
708; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
709; X64-AVX-NEXT:    retq
710  %m = and i32 %a1, 7
711  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %m)
712  ret i32 %res
713}
714
715define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind {
716; X86-SSE2-LABEL: fshr_i32_undef1_cst:
717; X86-SSE2:       # %bb.0:
718; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
719; X86-SSE2-NEXT:    shll $23, %eax
720; X86-SSE2-NEXT:    retl
721;
722; X64-AVX-LABEL: fshr_i32_undef1_cst:
723; X64-AVX:       # %bb.0:
724; X64-AVX-NEXT:    movl %edi, %eax
725; X64-AVX-NEXT:    shll $23, %eax
726; X64-AVX-NEXT:    retq
727  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
728  ret i32 %res
729}
730
731define <4 x i32> @fshr_v4i32_undef1_cst(<4 x i32> %a0) nounwind {
732; X86-SSE2-LABEL: fshr_v4i32_undef1_cst:
733; X86-SSE2:       # %bb.0:
734; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
735; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
736; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
737; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
738; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
739; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
740; X86-SSE2-NEXT:    retl
741;
742; X64-AVX2-LABEL: fshr_v4i32_undef1_cst:
743; X64-AVX2:       # %bb.0:
744; X64-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
745; X64-AVX2-NEXT:    retq
746;
747; X64-VBMI2-LABEL: fshr_v4i32_undef1_cst:
748; X64-VBMI2:       # %bb.0:
749; X64-VBMI2-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
750; X64-VBMI2-NEXT:    retq
751  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
752  ret <4 x i32> %res
753}
754
755define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind {
756; X86-SSE2-LABEL: fshr_i32_undef2:
757; X86-SSE2:       # %bb.0:
758; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
759; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
760; X86-SSE2-NEXT:    shrdl %cl, %ecx, %eax
761; X86-SSE2-NEXT:    retl
762;
763; X64-AVX-LABEL: fshr_i32_undef2:
764; X64-AVX:       # %bb.0:
765; X64-AVX-NEXT:    movl %esi, %eax
766; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
767; X64-AVX-NEXT:    retq
768  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 undef)
769  ret i32 %res
770}
771
772; shift zero args
773
774define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
775; X86-SSE2-LABEL: fshl_i32_zero0:
776; X86-SSE2:       # %bb.0:
777; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
778; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
779; X86-SSE2-NEXT:    xorl %eax, %eax
780; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
781; X86-SSE2-NEXT:    retl
782;
783; X64-AVX-LABEL: fshl_i32_zero0:
784; X64-AVX:       # %bb.0:
785; X64-AVX-NEXT:    movl %esi, %ecx
786; X64-AVX-NEXT:    xorl %eax, %eax
787; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
788; X64-AVX-NEXT:    shldl %cl, %edi, %eax
789; X64-AVX-NEXT:    retq
790  %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 %a1)
791  ret i32 %res
792}
793
794define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
795; X86-SSE2-LABEL: fshl_i32_zero0_cst:
796; X86-SSE2:       # %bb.0:
797; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
798; X86-SSE2-NEXT:    shrl $23, %eax
799; X86-SSE2-NEXT:    retl
800;
801; X64-AVX-LABEL: fshl_i32_zero0_cst:
802; X64-AVX:       # %bb.0:
803; X64-AVX-NEXT:    movl %edi, %eax
804; X64-AVX-NEXT:    shrl $23, %eax
805; X64-AVX-NEXT:    retq
806  %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
807  ret i32 %res
808}
809
810define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
811; X86-SSE2-LABEL: fshl_i32_zero1:
812; X86-SSE2:       # %bb.0:
813; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
814; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
815; X86-SSE2-NEXT:    xorl %edx, %edx
816; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
817; X86-SSE2-NEXT:    retl
818;
819; X64-AVX-LABEL: fshl_i32_zero1:
820; X64-AVX:       # %bb.0:
821; X64-AVX-NEXT:    movl %esi, %ecx
822; X64-AVX-NEXT:    movl %edi, %eax
823; X64-AVX-NEXT:    xorl %edx, %edx
824; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
825; X64-AVX-NEXT:    shldl %cl, %edx, %eax
826; X64-AVX-NEXT:    retq
827  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 %a1)
828  ret i32 %res
829}
830
831define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
832; X86-SSE2-LABEL: fshl_i32_zero1_cst:
833; X86-SSE2:       # %bb.0:
834; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
835; X86-SSE2-NEXT:    shll $9, %eax
836; X86-SSE2-NEXT:    retl
837;
838; X64-AVX-LABEL: fshl_i32_zero1_cst:
839; X64-AVX:       # %bb.0:
840; X64-AVX-NEXT:    movl %edi, %eax
841; X64-AVX-NEXT:    shll $9, %eax
842; X64-AVX-NEXT:    retq
843  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
844  ret i32 %res
845}
846
847define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
848; X86-SSE2-LABEL: fshr_i32_zero0:
849; X86-SSE2:       # %bb.0:
850; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
851; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
852; X86-SSE2-NEXT:    xorl %edx, %edx
853; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
854; X86-SSE2-NEXT:    retl
855;
856; X64-AVX-LABEL: fshr_i32_zero0:
857; X64-AVX:       # %bb.0:
858; X64-AVX-NEXT:    movl %esi, %ecx
859; X64-AVX-NEXT:    movl %edi, %eax
860; X64-AVX-NEXT:    xorl %edx, %edx
861; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
862; X64-AVX-NEXT:    shrdl %cl, %edx, %eax
863; X64-AVX-NEXT:    retq
864  %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 %a1)
865  ret i32 %res
866}
867
868define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
869; X86-SSE2-LABEL: fshr_i32_zero0_cst:
870; X86-SSE2:       # %bb.0:
871; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
872; X86-SSE2-NEXT:    shrl $9, %eax
873; X86-SSE2-NEXT:    retl
874;
875; X64-AVX-LABEL: fshr_i32_zero0_cst:
876; X64-AVX:       # %bb.0:
877; X64-AVX-NEXT:    movl %edi, %eax
878; X64-AVX-NEXT:    shrl $9, %eax
879; X64-AVX-NEXT:    retq
880  %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
881  ret i32 %res
882}
883
884define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
885; X86-SSE2-LABEL: fshr_i32_zero1:
886; X86-SSE2:       # %bb.0:
887; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
888; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
889; X86-SSE2-NEXT:    xorl %eax, %eax
890; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
891; X86-SSE2-NEXT:    retl
892;
893; X64-AVX-LABEL: fshr_i32_zero1:
894; X64-AVX:       # %bb.0:
895; X64-AVX-NEXT:    movl %esi, %ecx
896; X64-AVX-NEXT:    xorl %eax, %eax
897; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
898; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
899; X64-AVX-NEXT:    retq
900  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 %a1)
901  ret i32 %res
902}
903
904define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
905; X86-SSE2-LABEL: fshr_i32_zero1_cst:
906; X86-SSE2:       # %bb.0:
907; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
908; X86-SSE2-NEXT:    shll $23, %eax
909; X86-SSE2-NEXT:    retl
910;
911; X64-AVX-LABEL: fshr_i32_zero1_cst:
912; X64-AVX:       # %bb.0:
913; X64-AVX-NEXT:    movl %edi, %eax
914; X64-AVX-NEXT:    shll $23, %eax
915; X64-AVX-NEXT:    retq
916  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
917  ret i32 %res
918}
919
920; shift by zero
921
922define i32 @fshl_i32_zero2(i32 %a0, i32 %a1) nounwind {
923; X86-SSE2-LABEL: fshl_i32_zero2:
924; X86-SSE2:       # %bb.0:
925; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
926; X86-SSE2-NEXT:    retl
927;
928; X64-AVX-LABEL: fshl_i32_zero2:
929; X64-AVX:       # %bb.0:
930; X64-AVX-NEXT:    movl %edi, %eax
931; X64-AVX-NEXT:    retq
932  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 0)
933  ret i32 %res
934}
935
936define i32 @fshr_i32_zero2(i32 %a0, i32 %a1) nounwind {
937; X86-SSE2-LABEL: fshr_i32_zero2:
938; X86-SSE2:       # %bb.0:
939; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
940; X86-SSE2-NEXT:    retl
941;
942; X64-AVX-LABEL: fshr_i32_zero2:
943; X64-AVX:       # %bb.0:
944; X64-AVX-NEXT:    movl %esi, %eax
945; X64-AVX-NEXT:    retq
946  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 0)
947  ret i32 %res
948}
949
950; With constant shift amount, this is 'shrd' or 'shld'.
951
952define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind {
953; X86-SSE2-LABEL: fshr_i32_const_shift:
954; X86-SSE2:       # %bb.0:
955; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
956; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
957; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
958; X86-SSE2-NEXT:    retl
959;
960; X64-AVX-LABEL: fshr_i32_const_shift:
961; X64-AVX:       # %bb.0:
962; X64-AVX-NEXT:    movl %edi, %eax
963; X64-AVX-NEXT:    shldl $23, %esi, %eax
964; X64-AVX-NEXT:    retq
965  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
966  ret i32 %f
967}
968
969; Check modulo math on shift amount. 41-32=9, but right-shift may became left, so 32-9=23.
970
971define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
972; X86-SSE2-LABEL: fshr_i32_const_overshift:
973; X86-SSE2:       # %bb.0:
974; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
975; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
976; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
977; X86-SSE2-NEXT:    retl
978;
979; X64-AVX-LABEL: fshr_i32_const_overshift:
980; X64-AVX:       # %bb.0:
981; X64-AVX-NEXT:    movl %edi, %eax
982; X64-AVX-NEXT:    shldl $23, %esi, %eax
983; X64-AVX-NEXT:    retq
984  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
985  ret i32 %f
986}
987
988; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23.
989
990define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
991; X86-SSE2-LABEL: fshr_i64_const_overshift:
992; X86-SSE2:       # %bb.0:
993; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
994; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
995; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
996; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
997; X86-SSE2-NEXT:    shldl $23, %ecx, %edx
998; X86-SSE2-NEXT:    retl
999;
1000; X64-AVX-LABEL: fshr_i64_const_overshift:
1001; X64-AVX:       # %bb.0:
1002; X64-AVX-NEXT:    movq %rdi, %rax
1003; X64-AVX-NEXT:    shldq $23, %rsi, %rax
1004; X64-AVX-NEXT:    retq
1005  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
1006  ret i64 %f
1007}
1008
1009; This should work without any node-specific logic.
1010
1011define i8 @fshr_i8_const_fold() nounwind {
1012; CHECK-LABEL: fshr_i8_const_fold:
1013; CHECK:       # %bb.0:
1014; CHECK-NEXT:    movb $-2, %al
1015; CHECK-NEXT:    ret{{[l|q]}}
1016  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
1017  ret i8 %f
1018}
1019
1020define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
1021; X86-SSE2-LABEL: fshl_i32_shift_by_bitwidth:
1022; X86-SSE2:       # %bb.0:
1023; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1024; X86-SSE2-NEXT:    retl
1025;
1026; X64-AVX-LABEL: fshl_i32_shift_by_bitwidth:
1027; X64-AVX:       # %bb.0:
1028; X64-AVX-NEXT:    movl %edi, %eax
1029; X64-AVX-NEXT:    retq
1030  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
1031  ret i32 %f
1032}
1033
1034define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
1035; X86-SSE2-LABEL: fshr_i32_shift_by_bitwidth:
1036; X86-SSE2:       # %bb.0:
1037; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1038; X86-SSE2-NEXT:    retl
1039;
1040; X64-AVX-LABEL: fshr_i32_shift_by_bitwidth:
1041; X64-AVX:       # %bb.0:
1042; X64-AVX-NEXT:    movl %esi, %eax
1043; X64-AVX-NEXT:    retq
1044  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
1045  ret i32 %f
1046}
1047
1048define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
1049; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
1050; CHECK:       # %bb.0:
1051; CHECK-NEXT:    ret{{[l|q]}}
1052  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
1053  ret <4 x i32> %f
1054}
1055
1056define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
1057; X86-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth:
1058; X86-SSE2:       # %bb.0:
1059; X86-SSE2-NEXT:    movaps %xmm1, %xmm0
1060; X86-SSE2-NEXT:    retl
1061;
1062; X64-AVX-LABEL: fshr_v4i32_shift_by_bitwidth:
1063; X64-AVX:       # %bb.0:
1064; X64-AVX-NEXT:    vmovaps %xmm1, %xmm0
1065; X64-AVX-NEXT:    retq
1066  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
1067  ret <4 x i32> %f
1068}
1069
1070%struct.S = type { [11 x i8], i8 }
1071define void @PR45265(i32 %0, ptr nocapture readonly %1) nounwind {
1072; X86-SSE2-LABEL: PR45265:
1073; X86-SSE2:       # %bb.0:
1074; X86-SSE2-NEXT:    pushl %edi
1075; X86-SSE2-NEXT:    pushl %esi
1076; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1077; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1078; X86-SSE2-NEXT:    leal (%eax,%eax,2), %esi
1079; X86-SSE2-NEXT:    movzwl 8(%ecx,%esi,4), %edx
1080; X86-SSE2-NEXT:    movl 4(%ecx,%esi,4), %edi
1081; X86-SSE2-NEXT:    shrdl $8, %edx, %edi
1082; X86-SSE2-NEXT:    xorl %eax, %edi
1083; X86-SSE2-NEXT:    sarl $31, %eax
1084; X86-SSE2-NEXT:    movzbl 10(%ecx,%esi,4), %ecx
1085; X86-SSE2-NEXT:    shll $16, %ecx
1086; X86-SSE2-NEXT:    orl %edx, %ecx
1087; X86-SSE2-NEXT:    shll $8, %ecx
1088; X86-SSE2-NEXT:    movl %ecx, %edx
1089; X86-SSE2-NEXT:    sarl $8, %edx
1090; X86-SSE2-NEXT:    sarl $31, %ecx
1091; X86-SSE2-NEXT:    shldl $24, %edx, %ecx
1092; X86-SSE2-NEXT:    xorl %eax, %ecx
1093; X86-SSE2-NEXT:    orl %ecx, %edi
1094; X86-SSE2-NEXT:    jne .LBB50_1
1095; X86-SSE2-NEXT:  # %bb.2:
1096; X86-SSE2-NEXT:    popl %esi
1097; X86-SSE2-NEXT:    popl %edi
1098; X86-SSE2-NEXT:    jmp _Z3foov # TAILCALL
1099; X86-SSE2-NEXT:  .LBB50_1:
1100; X86-SSE2-NEXT:    popl %esi
1101; X86-SSE2-NEXT:    popl %edi
1102; X86-SSE2-NEXT:    retl
1103;
1104; X64-AVX-LABEL: PR45265:
1105; X64-AVX:       # %bb.0:
1106; X64-AVX-NEXT:    movslq %edi, %rax
1107; X64-AVX-NEXT:    leaq (%rax,%rax,2), %rcx
1108; X64-AVX-NEXT:    movsbq 10(%rsi,%rcx,4), %rdx
1109; X64-AVX-NEXT:    shlq $16, %rdx
1110; X64-AVX-NEXT:    movzwl 8(%rsi,%rcx,4), %edi
1111; X64-AVX-NEXT:    orq %rdx, %rdi
1112; X64-AVX-NEXT:    movq (%rsi,%rcx,4), %rcx
1113; X64-AVX-NEXT:    shrdq $40, %rdi, %rcx
1114; X64-AVX-NEXT:    cmpq %rax, %rcx
1115; X64-AVX-NEXT:    je _Z3foov # TAILCALL
1116; X64-AVX-NEXT:  # %bb.1:
1117; X64-AVX-NEXT:    retq
1118  %3 = sext i32 %0 to i64
1119  %4 = getelementptr inbounds %struct.S, ptr %1, i64 %3
1120  %5 = bitcast ptr %4 to ptr
1121  %6 = load i88, ptr %5, align 1
1122  %7 = ashr i88 %6, 40
1123  %8 = trunc i88 %7 to i64
1124  %9 = icmp eq i64 %8, %3
1125  br i1 %9, label %10, label %11
1126
112710:
1128  tail call void @_Z3foov()
1129  br label %11
1130
113111:
1132  ret void
1133}
1134declare dso_local void @_Z3foov()
1135
1136define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind {
1137; X86-SSE2-LABEL: or_shl_fshl:
1138; X86-SSE2:       # %bb.0:
1139; X86-SSE2-NEXT:    pushl %esi
1140; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1141; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1142; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1143; X86-SSE2-NEXT:    movl %edx, %esi
1144; X86-SSE2-NEXT:    shll %cl, %esi
1145; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
1146; X86-SSE2-NEXT:    orl %esi, %eax
1147; X86-SSE2-NEXT:    popl %esi
1148; X86-SSE2-NEXT:    retl
1149;
1150; X64-AVX-LABEL: or_shl_fshl:
1151; X64-AVX:       # %bb.0:
1152; X64-AVX-NEXT:    movl %edx, %ecx
1153; X64-AVX-NEXT:    movl %esi, %eax
1154; X64-AVX-NEXT:    shll %cl, %eax
1155; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1156; X64-AVX-NEXT:    shldl %cl, %esi, %edi
1157; X64-AVX-NEXT:    orl %edi, %eax
1158; X64-AVX-NEXT:    retq
1159  %shy = shl i32 %y, %s
1160  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
1161  %or = or i32 %fun, %shy
1162  ret i32 %or
1163}
1164
1165define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind {
1166; X86-SSE2-LABEL: or_shl_rotl:
1167; X86-SSE2:       # %bb.0:
1168; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1169; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1170; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1171; X86-SSE2-NEXT:    shll %cl, %edx
1172; X86-SSE2-NEXT:    roll %cl, %eax
1173; X86-SSE2-NEXT:    orl %edx, %eax
1174; X86-SSE2-NEXT:    retl
1175;
1176; X64-AVX-LABEL: or_shl_rotl:
1177; X64-AVX:       # %bb.0:
1178; X64-AVX-NEXT:    movl %edx, %ecx
1179; X64-AVX-NEXT:    movl %esi, %eax
1180; X64-AVX-NEXT:    shll %cl, %edi
1181; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1182; X64-AVX-NEXT:    roll %cl, %eax
1183; X64-AVX-NEXT:    orl %edi, %eax
1184; X64-AVX-NEXT:    retq
1185  %shx = shl i32 %x, %s
1186  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
1187  %or = or i32 %rot, %shx
1188  ret i32 %or
1189}
1190
1191define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind {
1192; X86-SSE2-LABEL: or_shl_fshl_commute:
1193; X86-SSE2:       # %bb.0:
1194; X86-SSE2-NEXT:    pushl %esi
1195; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1196; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1197; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1198; X86-SSE2-NEXT:    movl %edx, %esi
1199; X86-SSE2-NEXT:    shll %cl, %esi
1200; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
1201; X86-SSE2-NEXT:    orl %esi, %eax
1202; X86-SSE2-NEXT:    popl %esi
1203; X86-SSE2-NEXT:    retl
1204;
1205; X64-AVX-LABEL: or_shl_fshl_commute:
1206; X64-AVX:       # %bb.0:
1207; X64-AVX-NEXT:    movl %edx, %ecx
1208; X64-AVX-NEXT:    movl %esi, %eax
1209; X64-AVX-NEXT:    shll %cl, %eax
1210; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1211; X64-AVX-NEXT:    shldl %cl, %esi, %edi
1212; X64-AVX-NEXT:    orl %edi, %eax
1213; X64-AVX-NEXT:    retq
1214  %shy = shl i32 %y, %s
1215  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
1216  %or = or i32 %shy, %fun
1217  ret i32 %or
1218}
1219
1220define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind {
1221; X86-SSE2-LABEL: or_shl_rotl_commute:
1222; X86-SSE2:       # %bb.0:
1223; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1224; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1225; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1226; X86-SSE2-NEXT:    shll %cl, %edx
1227; X86-SSE2-NEXT:    roll %cl, %eax
1228; X86-SSE2-NEXT:    orl %edx, %eax
1229; X86-SSE2-NEXT:    retl
1230;
1231; X64-AVX-LABEL: or_shl_rotl_commute:
1232; X64-AVX:       # %bb.0:
1233; X64-AVX-NEXT:    movl %edx, %ecx
1234; X64-AVX-NEXT:    movl %esi, %eax
1235; X64-AVX-NEXT:    shll %cl, %edi
1236; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1237; X64-AVX-NEXT:    roll %cl, %eax
1238; X64-AVX-NEXT:    orl %edi, %eax
1239; X64-AVX-NEXT:    retq
1240  %shx = shl i32 %x, %s
1241  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
1242  %or = or i32 %shx, %rot
1243  ret i32 %or
1244}
1245
1246define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind {
1247; X86-SSE2-LABEL: or_lshr_fshr:
1248; X86-SSE2:       # %bb.0:
1249; X86-SSE2-NEXT:    pushl %esi
1250; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1251; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1252; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1253; X86-SSE2-NEXT:    movl %edx, %esi
1254; X86-SSE2-NEXT:    shrl %cl, %esi
1255; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
1256; X86-SSE2-NEXT:    orl %esi, %eax
1257; X86-SSE2-NEXT:    popl %esi
1258; X86-SSE2-NEXT:    retl
1259;
1260; X64-AVX-LABEL: or_lshr_fshr:
1261; X64-AVX:       # %bb.0:
1262; X64-AVX-NEXT:    movl %edx, %ecx
1263; X64-AVX-NEXT:    movl %esi, %eax
1264; X64-AVX-NEXT:    shrl %cl, %eax
1265; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1266; X64-AVX-NEXT:    shrdl %cl, %esi, %edi
1267; X64-AVX-NEXT:    orl %edi, %eax
1268; X64-AVX-NEXT:    retq
1269  %shy = lshr i32 %y, %s
1270  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
1271  %or = or i32 %fun, %shy
1272  ret i32 %or
1273}
1274
1275define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind {
1276; X86-SSE2-LABEL: or_lshr_rotr:
1277; X86-SSE2:       # %bb.0:
1278; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1279; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1280; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1281; X86-SSE2-NEXT:    shrl %cl, %edx
1282; X86-SSE2-NEXT:    rorl %cl, %eax
1283; X86-SSE2-NEXT:    orl %edx, %eax
1284; X86-SSE2-NEXT:    retl
1285;
1286; X64-AVX-LABEL: or_lshr_rotr:
1287; X64-AVX:       # %bb.0:
1288; X64-AVX-NEXT:    movl %edx, %ecx
1289; X64-AVX-NEXT:    movl %esi, %eax
1290; X64-AVX-NEXT:    shrl %cl, %edi
1291; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1292; X64-AVX-NEXT:    rorl %cl, %eax
1293; X64-AVX-NEXT:    orl %edi, %eax
1294; X64-AVX-NEXT:    retq
1295  %shx = lshr i32 %x, %s
1296  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
1297  %or = or i32 %rot, %shx
1298  ret i32 %or
1299}
1300
1301define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind {
1302; X86-SSE2-LABEL: or_lshr_fshr_commute:
1303; X86-SSE2:       # %bb.0:
1304; X86-SSE2-NEXT:    pushl %esi
1305; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1306; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1307; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1308; X86-SSE2-NEXT:    movl %edx, %esi
1309; X86-SSE2-NEXT:    shrl %cl, %esi
1310; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
1311; X86-SSE2-NEXT:    orl %esi, %eax
1312; X86-SSE2-NEXT:    popl %esi
1313; X86-SSE2-NEXT:    retl
1314;
1315; X64-AVX-LABEL: or_lshr_fshr_commute:
1316; X64-AVX:       # %bb.0:
1317; X64-AVX-NEXT:    movl %edx, %ecx
1318; X64-AVX-NEXT:    movl %esi, %eax
1319; X64-AVX-NEXT:    shrl %cl, %eax
1320; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1321; X64-AVX-NEXT:    shrdl %cl, %esi, %edi
1322; X64-AVX-NEXT:    orl %edi, %eax
1323; X64-AVX-NEXT:    retq
1324  %shy = lshr i32 %y, %s
1325  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
1326  %or = or i32 %shy, %fun
1327  ret i32 %or
1328}
1329
1330define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
1331; X86-SSE2-LABEL: or_lshr_rotr_commute:
1332; X86-SSE2:       # %bb.0:
1333; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1334; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1335; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1336; X86-SSE2-NEXT:    shrl %cl, %edx
1337; X86-SSE2-NEXT:    rorl %cl, %eax
1338; X86-SSE2-NEXT:    orl %edx, %eax
1339; X86-SSE2-NEXT:    retl
1340;
1341; X64-AVX-LABEL: or_lshr_rotr_commute:
1342; X64-AVX:       # %bb.0:
1343; X64-AVX-NEXT:    movl %edx, %ecx
1344; X64-AVX-NEXT:    movl %esi, %eax
1345; X64-AVX-NEXT:    shrl %cl, %edi
1346; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1347; X64-AVX-NEXT:    rorl %cl, %eax
1348; X64-AVX-NEXT:    orl %edi, %eax
1349; X64-AVX-NEXT:    retq
1350  %shx = lshr i32 %x, %s
1351  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
1352  %or = or i32 %shx, %rot
1353  ret i32 %or
1354}
1355
1356define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
1357; X86-SSE2-LABEL: or_shl_fshl_simplify:
1358; X86-SSE2:       # %bb.0:
1359; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1360; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1361; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1362; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
1363; X86-SSE2-NEXT:    retl
1364;
1365; X64-AVX-LABEL: or_shl_fshl_simplify:
1366; X64-AVX:       # %bb.0:
1367; X64-AVX-NEXT:    movl %edx, %ecx
1368; X64-AVX-NEXT:    movl %esi, %eax
1369; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1370; X64-AVX-NEXT:    shldl %cl, %edi, %eax
1371; X64-AVX-NEXT:    retq
1372  %shy = shl i32 %y, %s
1373  %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
1374  %or = or i32 %fun, %shy
1375  ret i32 %or
1376}
1377
1378define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
1379; X86-SSE2-LABEL: or_lshr_fshr_simplify:
1380; X86-SSE2:       # %bb.0:
1381; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1382; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1383; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1384; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
1385; X86-SSE2-NEXT:    retl
1386;
1387; X64-AVX-LABEL: or_lshr_fshr_simplify:
1388; X64-AVX:       # %bb.0:
1389; X64-AVX-NEXT:    movl %edx, %ecx
1390; X64-AVX-NEXT:    movl %esi, %eax
1391; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
1392; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
1393; X64-AVX-NEXT:    retq
1394  %shy = lshr i32 %y, %s
1395  %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
1396  %or = or i32 %shy, %fun
1397  ret i32 %or
1398}
1399