xref: /llvm-project/llvm/test/CodeGen/X86/fshr.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-FAST
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X86,X86-SLOW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-FAST
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW
6
7declare i8 @llvm.fshr.i8(i8, i8, i8) nounwind readnone
8declare i16 @llvm.fshr.i16(i16, i16, i16) nounwind readnone
9declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone
10declare i64 @llvm.fshr.i64(i64, i64, i64) nounwind readnone
11declare i128 @llvm.fshr.i128(i128, i128, i128) nounwind readnone
12
13;
14; Variable Funnel Shift
15;
16
17define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
18; X86-LABEL: var_shift_i8:
19; X86:       # %bb.0:
20; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
21; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
22; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
23; X86-NEXT:    shll $8, %eax
24; X86-NEXT:    orl %edx, %eax
25; X86-NEXT:    andb $7, %cl
26; X86-NEXT:    shrl %cl, %eax
27; X86-NEXT:    # kill: def $al killed $al killed $eax
28; X86-NEXT:    retl
29;
30; X64-LABEL: var_shift_i8:
31; X64:       # %bb.0:
32; X64-NEXT:    movl %edx, %ecx
33; X64-NEXT:    shll $8, %edi
34; X64-NEXT:    movzbl %sil, %eax
35; X64-NEXT:    orl %edi, %eax
36; X64-NEXT:    andb $7, %cl
37; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
38; X64-NEXT:    shrl %cl, %eax
39; X64-NEXT:    # kill: def $al killed $al killed $eax
40; X64-NEXT:    retq
41  %tmp = tail call i8 @llvm.fshr.i8(i8 %x, i8 %y, i8 %z)
42  ret i8 %tmp
43}
44
45define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
46; X86-FAST-LABEL: var_shift_i16:
47; X86-FAST:       # %bb.0:
48; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
49; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
50; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
51; X86-FAST-NEXT:    andb $15, %cl
52; X86-FAST-NEXT:    shrdw %cl, %dx, %ax
53; X86-FAST-NEXT:    retl
54;
55; X86-SLOW-LABEL: var_shift_i16:
56; X86-SLOW:       # %bb.0:
57; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
58; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
59; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
60; X86-SLOW-NEXT:    shll $16, %eax
61; X86-SLOW-NEXT:    orl %edx, %eax
62; X86-SLOW-NEXT:    andb $15, %cl
63; X86-SLOW-NEXT:    shrl %cl, %eax
64; X86-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
65; X86-SLOW-NEXT:    retl
66;
67; X64-FAST-LABEL: var_shift_i16:
68; X64-FAST:       # %bb.0:
69; X64-FAST-NEXT:    movl %edx, %ecx
70; X64-FAST-NEXT:    movl %esi, %eax
71; X64-FAST-NEXT:    andb $15, %cl
72; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
73; X64-FAST-NEXT:    shrdw %cl, %di, %ax
74; X64-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
75; X64-FAST-NEXT:    retq
76;
77; X64-SLOW-LABEL: var_shift_i16:
78; X64-SLOW:       # %bb.0:
79; X64-SLOW-NEXT:    movl %edx, %ecx
80; X64-SLOW-NEXT:    shll $16, %edi
81; X64-SLOW-NEXT:    movzwl %si, %eax
82; X64-SLOW-NEXT:    orl %edi, %eax
83; X64-SLOW-NEXT:    andb $15, %cl
84; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
85; X64-SLOW-NEXT:    shrl %cl, %eax
86; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
87; X64-SLOW-NEXT:    retq
88  %tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
89  ret i16 %tmp
90}
91
92define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
93; X86-FAST-LABEL: var_shift_i32:
94; X86-FAST:       # %bb.0:
95; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
96; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
97; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
98; X86-FAST-NEXT:    shrdl %cl, %edx, %eax
99; X86-FAST-NEXT:    retl
100;
101; X86-SLOW-LABEL: var_shift_i32:
102; X86-SLOW:       # %bb.0:
103; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
104; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
105; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
106; X86-SLOW-NEXT:    shrl %cl, %edx
107; X86-SLOW-NEXT:    notb %cl
108; X86-SLOW-NEXT:    addl %eax, %eax
109; X86-SLOW-NEXT:    shll %cl, %eax
110; X86-SLOW-NEXT:    orl %edx, %eax
111; X86-SLOW-NEXT:    retl
112;
113; X64-FAST-LABEL: var_shift_i32:
114; X64-FAST:       # %bb.0:
115; X64-FAST-NEXT:    movl %edx, %ecx
116; X64-FAST-NEXT:    movl %esi, %eax
117; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
118; X64-FAST-NEXT:    shrdl %cl, %edi, %eax
119; X64-FAST-NEXT:    retq
120;
121; X64-SLOW-LABEL: var_shift_i32:
122; X64-SLOW:       # %bb.0:
123; X64-SLOW-NEXT:    movl %edx, %ecx
124; X64-SLOW-NEXT:    # kill: def $edi killed $edi def $rdi
125; X64-SLOW-NEXT:    shrl %cl, %esi
126; X64-SLOW-NEXT:    leal (%rdi,%rdi), %eax
127; X64-SLOW-NEXT:    notb %cl
128; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
129; X64-SLOW-NEXT:    shll %cl, %eax
130; X64-SLOW-NEXT:    orl %esi, %eax
131; X64-SLOW-NEXT:    retq
132  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
133  ret i32 %tmp
134}
135
136define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
137; X86-LABEL: var_shift_i32_optsize:
138; X86:       # %bb.0:
139; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
140; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
141; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
142; X86-NEXT:    shrdl %cl, %edx, %eax
143; X86-NEXT:    retl
144;
145; X64-LABEL: var_shift_i32_optsize:
146; X64:       # %bb.0:
147; X64-NEXT:    movl %edx, %ecx
148; X64-NEXT:    movl %esi, %eax
149; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
150; X64-NEXT:    shrdl %cl, %edi, %eax
151; X64-NEXT:    retq
152  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
153  ret i32 %tmp
154}
155
156define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
157; X86-LABEL: var_shift_i32_pgso:
158; X86:       # %bb.0:
159; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
160; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
161; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
162; X86-NEXT:    shrdl %cl, %edx, %eax
163; X86-NEXT:    retl
164;
165; X64-LABEL: var_shift_i32_pgso:
166; X64:       # %bb.0:
167; X64-NEXT:    movl %edx, %ecx
168; X64-NEXT:    movl %esi, %eax
169; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
170; X64-NEXT:    shrdl %cl, %edi, %eax
171; X64-NEXT:    retq
172  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
173  ret i32 %tmp
174}
175
176define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
177; X86-FAST-LABEL: var_shift_i64:
178; X86-FAST:       # %bb.0:
179; X86-FAST-NEXT:    pushl %esi
180; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
181; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
182; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
183; X86-FAST-NEXT:    testb $32, %cl
184; X86-FAST-NEXT:    je .LBB5_1
185; X86-FAST-NEXT:  # %bb.2:
186; X86-FAST-NEXT:    movl %esi, %edx
187; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
188; X86-FAST-NEXT:    jmp .LBB5_3
189; X86-FAST-NEXT:  .LBB5_1:
190; X86-FAST-NEXT:    movl %eax, %edx
191; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
192; X86-FAST-NEXT:  .LBB5_3:
193; X86-FAST-NEXT:    shrdl %cl, %edx, %eax
194; X86-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
195; X86-FAST-NEXT:    shrdl %cl, %esi, %edx
196; X86-FAST-NEXT:    popl %esi
197; X86-FAST-NEXT:    retl
198;
199; X86-SLOW-LABEL: var_shift_i64:
200; X86-SLOW:       # %bb.0:
201; X86-SLOW-NEXT:    pushl %ebx
202; X86-SLOW-NEXT:    pushl %edi
203; X86-SLOW-NEXT:    pushl %esi
204; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
205; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
206; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
207; X86-SLOW-NEXT:    testb $32, %bl
208; X86-SLOW-NEXT:    je .LBB5_1
209; X86-SLOW-NEXT:  # %bb.2:
210; X86-SLOW-NEXT:    movl %edx, %esi
211; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
212; X86-SLOW-NEXT:    jmp .LBB5_3
213; X86-SLOW-NEXT:  .LBB5_1:
214; X86-SLOW-NEXT:    movl %eax, %esi
215; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
216; X86-SLOW-NEXT:  .LBB5_3:
217; X86-SLOW-NEXT:    leal (%esi,%esi), %edi
218; X86-SLOW-NEXT:    movb %bl, %ch
219; X86-SLOW-NEXT:    notb %ch
220; X86-SLOW-NEXT:    movb %ch, %cl
221; X86-SLOW-NEXT:    shll %cl, %edi
222; X86-SLOW-NEXT:    movb %bl, %cl
223; X86-SLOW-NEXT:    shrl %cl, %eax
224; X86-SLOW-NEXT:    orl %edi, %eax
225; X86-SLOW-NEXT:    shrl %cl, %esi
226; X86-SLOW-NEXT:    addl %edx, %edx
227; X86-SLOW-NEXT:    movb %ch, %cl
228; X86-SLOW-NEXT:    shll %cl, %edx
229; X86-SLOW-NEXT:    orl %esi, %edx
230; X86-SLOW-NEXT:    popl %esi
231; X86-SLOW-NEXT:    popl %edi
232; X86-SLOW-NEXT:    popl %ebx
233; X86-SLOW-NEXT:    retl
234;
235; X64-FAST-LABEL: var_shift_i64:
236; X64-FAST:       # %bb.0:
237; X64-FAST-NEXT:    movq %rdx, %rcx
238; X64-FAST-NEXT:    movq %rsi, %rax
239; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $rcx
240; X64-FAST-NEXT:    shrdq %cl, %rdi, %rax
241; X64-FAST-NEXT:    retq
242;
243; X64-SLOW-LABEL: var_shift_i64:
244; X64-SLOW:       # %bb.0:
245; X64-SLOW-NEXT:    movq %rdx, %rcx
246; X64-SLOW-NEXT:    shrq %cl, %rsi
247; X64-SLOW-NEXT:    leaq (%rdi,%rdi), %rax
248; X64-SLOW-NEXT:    notb %cl
249; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $rcx
250; X64-SLOW-NEXT:    shlq %cl, %rax
251; X64-SLOW-NEXT:    orq %rsi, %rax
252; X64-SLOW-NEXT:    retq
253  %tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
254  ret i64 %tmp
255}
256
257define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
258; X86-FAST-LABEL: var_shift_i128:
259; X86-FAST:       # %bb.0:
260; X86-FAST-NEXT:    pushl %ebp
261; X86-FAST-NEXT:    pushl %ebx
262; X86-FAST-NEXT:    pushl %edi
263; X86-FAST-NEXT:    pushl %esi
264; X86-FAST-NEXT:    pushl %eax
265; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
266; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edi
267; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebx
268; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
269; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
270; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
271; X86-FAST-NEXT:    testb $64, %cl
272; X86-FAST-NEXT:    je .LBB6_1
273; X86-FAST-NEXT:  # %bb.2:
274; X86-FAST-NEXT:    movl %edx, (%esp) # 4-byte Spill
275; X86-FAST-NEXT:    movl %edi, %edx
276; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edi
277; X86-FAST-NEXT:    movl %esi, %ebp
278; X86-FAST-NEXT:    movl %ebx, %esi
279; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebx
280; X86-FAST-NEXT:    testb $32, %cl
281; X86-FAST-NEXT:    je .LBB6_4
282; X86-FAST-NEXT:    jmp .LBB6_5
283; X86-FAST-NEXT:  .LBB6_1:
284; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebp
285; X86-FAST-NEXT:    movl %ebp, (%esp) # 4-byte Spill
286; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebp
287; X86-FAST-NEXT:    testb $32, %cl
288; X86-FAST-NEXT:    jne .LBB6_5
289; X86-FAST-NEXT:  .LBB6_4:
290; X86-FAST-NEXT:    movl %edi, %ebx
291; X86-FAST-NEXT:    movl %esi, %edi
292; X86-FAST-NEXT:    movl %edx, %esi
293; X86-FAST-NEXT:    movl %ebp, %edx
294; X86-FAST-NEXT:    movl (%esp), %ebp # 4-byte Reload
295; X86-FAST-NEXT:  .LBB6_5:
296; X86-FAST-NEXT:    shrdl %cl, %edx, %ebp
297; X86-FAST-NEXT:    shrdl %cl, %esi, %edx
298; X86-FAST-NEXT:    shrdl %cl, %edi, %esi
299; X86-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
300; X86-FAST-NEXT:    shrdl %cl, %ebx, %edi
301; X86-FAST-NEXT:    movl %edi, 12(%eax)
302; X86-FAST-NEXT:    movl %esi, 8(%eax)
303; X86-FAST-NEXT:    movl %edx, 4(%eax)
304; X86-FAST-NEXT:    movl %ebp, (%eax)
305; X86-FAST-NEXT:    addl $4, %esp
306; X86-FAST-NEXT:    popl %esi
307; X86-FAST-NEXT:    popl %edi
308; X86-FAST-NEXT:    popl %ebx
309; X86-FAST-NEXT:    popl %ebp
310; X86-FAST-NEXT:    retl $4
311;
312; X86-SLOW-LABEL: var_shift_i128:
313; X86-SLOW:       # %bb.0:
314; X86-SLOW-NEXT:    pushl %ebp
315; X86-SLOW-NEXT:    pushl %ebx
316; X86-SLOW-NEXT:    pushl %edi
317; X86-SLOW-NEXT:    pushl %esi
318; X86-SLOW-NEXT:    subl $8, %esp
319; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
320; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
321; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
322; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
323; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
324; X86-SLOW-NEXT:    testb $64, %cl
325; X86-SLOW-NEXT:    je .LBB6_1
326; X86-SLOW-NEXT:  # %bb.2:
327; X86-SLOW-NEXT:    movl %ebp, %eax
328; X86-SLOW-NEXT:    movl %ebx, %ebp
329; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
330; X86-SLOW-NEXT:    movl %edi, %edx
331; X86-SLOW-NEXT:    movl %esi, %edi
332; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
333; X86-SLOW-NEXT:    testb $32, %cl
334; X86-SLOW-NEXT:    jne .LBB6_5
335; X86-SLOW-NEXT:  .LBB6_4:
336; X86-SLOW-NEXT:    movl %ebx, %esi
337; X86-SLOW-NEXT:    movl %edi, (%esp) # 4-byte Spill
338; X86-SLOW-NEXT:    movl %ebp, %edi
339; X86-SLOW-NEXT:    movl %edx, %ebp
340; X86-SLOW-NEXT:    movl %eax, %edx
341; X86-SLOW-NEXT:    jmp .LBB6_6
342; X86-SLOW-NEXT:  .LBB6_1:
343; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
344; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
345; X86-SLOW-NEXT:    testb $32, %cl
346; X86-SLOW-NEXT:    je .LBB6_4
347; X86-SLOW-NEXT:  .LBB6_5:
348; X86-SLOW-NEXT:    movl %ebx, (%esp) # 4-byte Spill
349; X86-SLOW-NEXT:  .LBB6_6:
350; X86-SLOW-NEXT:    shrl %cl, %edx
351; X86-SLOW-NEXT:    movl %ecx, %ebx
352; X86-SLOW-NEXT:    notb %bl
353; X86-SLOW-NEXT:    leal (%ebp,%ebp), %eax
354; X86-SLOW-NEXT:    movl %ebx, %ecx
355; X86-SLOW-NEXT:    shll %cl, %eax
356; X86-SLOW-NEXT:    orl %edx, %eax
357; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
358; X86-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
359; X86-SLOW-NEXT:    shrl %cl, %ebp
360; X86-SLOW-NEXT:    leal (%edi,%edi), %edx
361; X86-SLOW-NEXT:    movl %ebx, %ecx
362; X86-SLOW-NEXT:    shll %cl, %edx
363; X86-SLOW-NEXT:    orl %ebp, %edx
364; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
365; X86-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
366; X86-SLOW-NEXT:    shrl %cl, %edi
367; X86-SLOW-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
368; X86-SLOW-NEXT:    movl (%esp), %edi # 4-byte Reload
369; X86-SLOW-NEXT:    leal (%edi,%edi), %ebp
370; X86-SLOW-NEXT:    movl %ebx, %ecx
371; X86-SLOW-NEXT:    shll %cl, %ebp
372; X86-SLOW-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
373; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
374; X86-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
375; X86-SLOW-NEXT:    shrl %cl, %edi
376; X86-SLOW-NEXT:    addl %esi, %esi
377; X86-SLOW-NEXT:    movl %ebx, %ecx
378; X86-SLOW-NEXT:    shll %cl, %esi
379; X86-SLOW-NEXT:    orl %edi, %esi
380; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
381; X86-SLOW-NEXT:    movl %esi, 12(%ecx)
382; X86-SLOW-NEXT:    movl %ebp, 8(%ecx)
383; X86-SLOW-NEXT:    movl %edx, 4(%ecx)
384; X86-SLOW-NEXT:    movl %eax, (%ecx)
385; X86-SLOW-NEXT:    movl %ecx, %eax
386; X86-SLOW-NEXT:    addl $8, %esp
387; X86-SLOW-NEXT:    popl %esi
388; X86-SLOW-NEXT:    popl %edi
389; X86-SLOW-NEXT:    popl %ebx
390; X86-SLOW-NEXT:    popl %ebp
391; X86-SLOW-NEXT:    retl $4
392;
393; X64-FAST-LABEL: var_shift_i128:
394; X64-FAST:       # %bb.0:
395; X64-FAST-NEXT:    movq %rdx, %rax
396; X64-FAST-NEXT:    testb $64, %r8b
397; X64-FAST-NEXT:    cmoveq %rdi, %rsi
398; X64-FAST-NEXT:    cmoveq %rcx, %rdi
399; X64-FAST-NEXT:    cmovneq %rcx, %rax
400; X64-FAST-NEXT:    movl %r8d, %ecx
401; X64-FAST-NEXT:    shrdq %cl, %rdi, %rax
402; X64-FAST-NEXT:    shrdq %cl, %rsi, %rdi
403; X64-FAST-NEXT:    movq %rdi, %rdx
404; X64-FAST-NEXT:    retq
405;
406; X64-SLOW-LABEL: var_shift_i128:
407; X64-SLOW:       # %bb.0:
408; X64-SLOW-NEXT:    testb $64, %r8b
409; X64-SLOW-NEXT:    cmoveq %rdi, %rsi
410; X64-SLOW-NEXT:    cmoveq %rcx, %rdi
411; X64-SLOW-NEXT:    cmovneq %rcx, %rdx
412; X64-SLOW-NEXT:    movl %r8d, %ecx
413; X64-SLOW-NEXT:    shrq %cl, %rdx
414; X64-SLOW-NEXT:    leaq (%rdi,%rdi), %rax
415; X64-SLOW-NEXT:    movl %r8d, %r9d
416; X64-SLOW-NEXT:    notb %r9b
417; X64-SLOW-NEXT:    movl %r9d, %ecx
418; X64-SLOW-NEXT:    shlq %cl, %rax
419; X64-SLOW-NEXT:    orq %rdx, %rax
420; X64-SLOW-NEXT:    movl %r8d, %ecx
421; X64-SLOW-NEXT:    shrq %cl, %rdi
422; X64-SLOW-NEXT:    leaq (%rsi,%rsi), %rdx
423; X64-SLOW-NEXT:    movl %r9d, %ecx
424; X64-SLOW-NEXT:    shlq %cl, %rdx
425; X64-SLOW-NEXT:    orq %rdi, %rdx
426; X64-SLOW-NEXT:    retq
427  %tmp = tail call i128 @llvm.fshr.i128(i128 %x, i128 %y, i128 %z)
428  ret i128 %tmp
429}
430
431;
432; Const Funnel Shift
433;
434
435define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
436; X86-LABEL: const_shift_i8:
437; X86:       # %bb.0:
438; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
439; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
440; X86-NEXT:    shrb $7, %cl
441; X86-NEXT:    addb %al, %al
442; X86-NEXT:    orb %cl, %al
443; X86-NEXT:    retl
444;
445; X64-LABEL: const_shift_i8:
446; X64:       # %bb.0:
447; X64-NEXT:    # kill: def $edi killed $edi def $rdi
448; X64-NEXT:    shrb $7, %sil
449; X64-NEXT:    leal (%rdi,%rdi), %eax
450; X64-NEXT:    orb %sil, %al
451; X64-NEXT:    # kill: def $al killed $al killed $eax
452; X64-NEXT:    retq
453  %tmp = tail call i8 @llvm.fshr.i8(i8 %x, i8 %y, i8 7)
454  ret i8 %tmp
455}
456
457define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
458; X86-FAST-LABEL: const_shift_i16:
459; X86-FAST:       # %bb.0:
460; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
461; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
462; X86-FAST-NEXT:    shrdw $7, %cx, %ax
463; X86-FAST-NEXT:    retl
464;
465; X86-SLOW-LABEL: const_shift_i16:
466; X86-SLOW:       # %bb.0:
467; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
468; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
469; X86-SLOW-NEXT:    shrl $7, %ecx
470; X86-SLOW-NEXT:    shll $9, %eax
471; X86-SLOW-NEXT:    orl %ecx, %eax
472; X86-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
473; X86-SLOW-NEXT:    retl
474;
475; X64-FAST-LABEL: const_shift_i16:
476; X64-FAST:       # %bb.0:
477; X64-FAST-NEXT:    movl %esi, %eax
478; X64-FAST-NEXT:    shrdw $7, %di, %ax
479; X64-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
480; X64-FAST-NEXT:    retq
481;
482; X64-SLOW-LABEL: const_shift_i16:
483; X64-SLOW:       # %bb.0:
484; X64-SLOW-NEXT:    movzwl %si, %eax
485; X64-SLOW-NEXT:    shll $9, %edi
486; X64-SLOW-NEXT:    shrl $7, %eax
487; X64-SLOW-NEXT:    orl %edi, %eax
488; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
489; X64-SLOW-NEXT:    retq
490  %tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 7)
491  ret i16 %tmp
492}
493
494define i32 @const_shift_i32(i32 %x, i32 %y) nounwind {
495; X86-FAST-LABEL: const_shift_i32:
496; X86-FAST:       # %bb.0:
497; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
498; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
499; X86-FAST-NEXT:    shrdl $7, %ecx, %eax
500; X86-FAST-NEXT:    retl
501;
502; X86-SLOW-LABEL: const_shift_i32:
503; X86-SLOW:       # %bb.0:
504; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
505; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
506; X86-SLOW-NEXT:    shrl $7, %ecx
507; X86-SLOW-NEXT:    shll $25, %eax
508; X86-SLOW-NEXT:    orl %ecx, %eax
509; X86-SLOW-NEXT:    retl
510;
511; X64-FAST-LABEL: const_shift_i32:
512; X64-FAST:       # %bb.0:
513; X64-FAST-NEXT:    movl %edi, %eax
514; X64-FAST-NEXT:    shldl $25, %esi, %eax
515; X64-FAST-NEXT:    retq
516;
517; X64-SLOW-LABEL: const_shift_i32:
518; X64-SLOW:       # %bb.0:
519; X64-SLOW-NEXT:    # kill: def $esi killed $esi def $rsi
520; X64-SLOW-NEXT:    # kill: def $edi killed $edi def $rdi
521; X64-SLOW-NEXT:    shrl $7, %esi
522; X64-SLOW-NEXT:    shll $25, %edi
523; X64-SLOW-NEXT:    leal (%rdi,%rsi), %eax
524; X64-SLOW-NEXT:    retq
525  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7)
526  ret i32 %tmp
527}
528
529define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
530; X86-FAST-LABEL: const_shift_i64:
531; X86-FAST:       # %bb.0:
532; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
533; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
534; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
535; X86-FAST-NEXT:    shldl $25, %ecx, %edx
536; X86-FAST-NEXT:    shrdl $7, %ecx, %eax
537; X86-FAST-NEXT:    retl
538;
539; X86-SLOW-LABEL: const_shift_i64:
540; X86-SLOW:       # %bb.0:
541; X86-SLOW-NEXT:    pushl %esi
542; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
543; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
544; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
545; X86-SLOW-NEXT:    shrl $7, %ecx
546; X86-SLOW-NEXT:    movl %esi, %eax
547; X86-SLOW-NEXT:    shll $25, %eax
548; X86-SLOW-NEXT:    orl %ecx, %eax
549; X86-SLOW-NEXT:    shrl $7, %esi
550; X86-SLOW-NEXT:    shll $25, %edx
551; X86-SLOW-NEXT:    orl %esi, %edx
552; X86-SLOW-NEXT:    popl %esi
553; X86-SLOW-NEXT:    retl
554;
555; X64-FAST-LABEL: const_shift_i64:
556; X64-FAST:       # %bb.0:
557; X64-FAST-NEXT:    movq %rdi, %rax
558; X64-FAST-NEXT:    shldq $57, %rsi, %rax
559; X64-FAST-NEXT:    retq
560;
561; X64-SLOW-LABEL: const_shift_i64:
562; X64-SLOW:       # %bb.0:
563; X64-SLOW-NEXT:    shrq $7, %rsi
564; X64-SLOW-NEXT:    shlq $57, %rdi
565; X64-SLOW-NEXT:    leaq (%rdi,%rsi), %rax
566; X64-SLOW-NEXT:    retq
567  %tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 7)
568  ret i64 %tmp
569}
570
571;
572; Combine Consecutive Loads
573;
574
575define i8 @combine_fshr_load_i8(ptr %p) nounwind {
576; X86-LABEL: combine_fshr_load_i8:
577; X86:       # %bb.0:
578; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
579; X86-NEXT:    movzbl (%eax), %eax
580; X86-NEXT:    retl
581;
582; X64-LABEL: combine_fshr_load_i8:
583; X64:       # %bb.0:
584; X64-NEXT:    movzbl (%rdi), %eax
585; X64-NEXT:    retq
586  %p1 = getelementptr i8, ptr %p, i32 1
587  %ld0 = load i8, ptr%p
588  %ld1 = load i8, ptr%p1
589  %res = call i8 @llvm.fshr.i8(i8 %ld1, i8 %ld0, i8 8)
590  ret i8 %res
591}
592
593define i16 @combine_fshr_load_i16(ptr %p) nounwind {
594; X86-LABEL: combine_fshr_load_i16:
595; X86:       # %bb.0:
596; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
597; X86-NEXT:    movzwl 1(%eax), %eax
598; X86-NEXT:    retl
599;
600; X64-LABEL: combine_fshr_load_i16:
601; X64:       # %bb.0:
602; X64-NEXT:    movzwl 1(%rdi), %eax
603; X64-NEXT:    retq
604  %p1 = getelementptr i16, ptr %p, i32 1
605  %ld0 = load i16, ptr%p
606  %ld1 = load i16, ptr%p1
607  %res = call i16 @llvm.fshr.i16(i16 %ld1, i16 %ld0, i16 8)
608  ret i16 %res
609}
610
611define i32 @combine_fshr_load_i32(ptr %p) nounwind {
612; X86-LABEL: combine_fshr_load_i32:
613; X86:       # %bb.0:
614; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
615; X86-NEXT:    movl 9(%eax), %eax
616; X86-NEXT:    retl
617;
618; X64-LABEL: combine_fshr_load_i32:
619; X64:       # %bb.0:
620; X64-NEXT:    movl 9(%rdi), %eax
621; X64-NEXT:    retq
622  %p0 = getelementptr i32, ptr %p, i32 2
623  %p1 = getelementptr i32, ptr %p, i32 3
624  %ld0 = load i32, ptr%p0
625  %ld1 = load i32, ptr%p1
626  %res = call i32 @llvm.fshr.i32(i32 %ld1, i32 %ld0, i32 8)
627  ret i32 %res
628}
629
630define i64 @combine_fshr_load_i64(ptr %p) nounwind {
631; X86-LABEL: combine_fshr_load_i64:
632; X86:       # %bb.0:
633; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
634; X86-NEXT:    movl 11(%ecx), %eax
635; X86-NEXT:    movl 15(%ecx), %edx
636; X86-NEXT:    retl
637;
638; X64-LABEL: combine_fshr_load_i64:
639; X64:       # %bb.0:
640; X64-NEXT:    movq 11(%rdi), %rax
641; X64-NEXT:    retq
642  %p0 = getelementptr i64, ptr %p, i64 1
643  %p1 = getelementptr i64, ptr %p, i64 2
644  %ld0 = load i64, ptr%p0
645  %ld1 = load i64, ptr%p1
646  %res = call i64 @llvm.fshr.i64(i64 %ld1, i64 %ld0, i64 24)
647  ret i64 %res
648}
649
650!llvm.module.flags = !{!0}
651!0 = !{i32 1, !"ProfileSummary", !1}
652!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
653!2 = !{!"ProfileFormat", !"InstrProf"}
654!3 = !{!"TotalCount", i64 10000}
655!4 = !{!"MaxCount", i64 10}
656!5 = !{!"MaxInternalCount", i64 1}
657!6 = !{!"MaxFunctionCount", i64 1000}
658!7 = !{!"NumCounts", i64 3}
659!8 = !{!"NumFunctions", i64 3}
660!9 = !{!"DetailedSummary", !10}
661!10 = !{!11, !12, !13}
662!11 = !{i32 10000, i64 100, i32 1}
663!12 = !{i32 999000, i64 100, i32 1}
664!13 = !{i32 999999, i64 1, i32 2}
665!14 = !{!"function_entry_count", i64 0}
666