xref: /llvm-project/llvm/test/CodeGen/X86/shift-by-signext.ll (revision f0dd12ec5c0169ba5b4363b62d59511181cf954a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+sse,+sse2,+avx,+avx2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+sse,+sse2,+avx,+avx2 | FileCheck %s --check-prefixes=CHECK,X64
4
5; If we have a shift by sign-extended value, we can replace sign-extension
6; with zero-extension.
7
8define i32 @t0_shl(i32 %x, i8 %shamt) nounwind {
9; X86-LABEL: t0_shl:
10; X86:       # %bb.0:
11; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
12; X86-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
13; X86-NEXT:    retl
14;
15; X64-LABEL: t0_shl:
16; X64:       # %bb.0:
17; X64-NEXT:    shlxl %esi, %edi, %eax
18; X64-NEXT:    retq
19  %shamt_wide = sext i8 %shamt to i32
20  %r = shl i32 %x, %shamt_wide
21  ret i32 %r
22}
23define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind {
24; X86-LABEL: t1_lshr:
25; X86:       # %bb.0:
26; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
27; X86-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
28; X86-NEXT:    retl
29;
30; X64-LABEL: t1_lshr:
31; X64:       # %bb.0:
32; X64-NEXT:    shrxl %esi, %edi, %eax
33; X64-NEXT:    retq
34  %shamt_wide = sext i8 %shamt to i32
35  %r = lshr i32 %x, %shamt_wide
36  ret i32 %r
37}
38define i32 @t2_ashr(i32 %x, i8 %shamt) nounwind {
39; X86-LABEL: t2_ashr:
40; X86:       # %bb.0:
41; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
42; X86-NEXT:    sarxl %eax, {{[0-9]+}}(%esp), %eax
43; X86-NEXT:    retl
44;
45; X64-LABEL: t2_ashr:
46; X64:       # %bb.0:
47; X64-NEXT:    sarxl %esi, %edi, %eax
48; X64-NEXT:    retq
49  %shamt_wide = sext i8 %shamt to i32
50  %r = ashr i32 %x, %shamt_wide
51  ret i32 %r
52}
53
54define <4 x i32> @t3_vec_shl(<4 x i32> %x, <4 x i8> %shamt) nounwind {
55; CHECK-LABEL: t3_vec_shl:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    vpmovsxbd %xmm1, %xmm1
58; CHECK-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
59; CHECK-NEXT:    ret{{[l|q]}}
60  %shamt_wide = sext <4 x i8> %shamt to <4 x i32>
61  %r = shl <4 x i32> %x, %shamt_wide
62  ret <4 x i32> %r
63}
64define <4 x i32> @t4_vec_lshr(<4 x i32> %x, <4 x i8> %shamt) nounwind {
65; CHECK-LABEL: t4_vec_lshr:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    vpmovsxbd %xmm1, %xmm1
68; CHECK-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
69; CHECK-NEXT:    ret{{[l|q]}}
70  %shamt_wide = sext <4 x i8> %shamt to <4 x i32>
71  %r = lshr <4 x i32> %x, %shamt_wide
72  ret <4 x i32> %r
73}
74define <4 x i32> @t5_vec_ashr(<4 x i32> %x, <4 x i8> %shamt) nounwind {
75; CHECK-LABEL: t5_vec_ashr:
76; CHECK:       # %bb.0:
77; CHECK-NEXT:    vpmovsxbd %xmm1, %xmm1
78; CHECK-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
79; CHECK-NEXT:    ret{{[l|q]}}
80  %shamt_wide = sext <4 x i8> %shamt to <4 x i32>
81  %r = ashr <4 x i32> %x, %shamt_wide
82  ret <4 x i32> %r
83}
84
85; This is not valid for funnel shifts
86declare i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
87declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
88define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
89; X86-LABEL: n6_fshl:
90; X86:       # %bb.0:
91; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
92; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
93; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
94; X86-NEXT:    shldl %cl, %edx, %eax
95; X86-NEXT:    retl
96;
97; X64-LABEL: n6_fshl:
98; X64:       # %bb.0:
99; X64-NEXT:    movl %edx, %ecx
100; X64-NEXT:    movl %edi, %eax
101; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
102; X64-NEXT:    shldl %cl, %esi, %eax
103; X64-NEXT:    retq
104  %shamt_wide = sext i8 %shamt to i32
105  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %shamt_wide)
106  ret i32 %r
107}
108define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
109; X86-LABEL: n7_fshr:
110; X86:       # %bb.0:
111; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
112; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
113; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
114; X86-NEXT:    shrdl %cl, %edx, %eax
115; X86-NEXT:    retl
116;
117; X64-LABEL: n7_fshr:
118; X64:       # %bb.0:
119; X64-NEXT:    movl %edx, %ecx
120; X64-NEXT:    movl %esi, %eax
121; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
122; X64-NEXT:    shrdl %cl, %edi, %eax
123; X64-NEXT:    retq
124  %shamt_wide = sext i8 %shamt to i32
125  %r = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %shamt_wide)
126  ret i32 %r
127}
128
129define i32 @n8_extrause(i32 %x, i8 %shamt, ptr %shamt_wide_store) nounwind {
130; X86-LABEL: n8_extrause:
131; X86:       # %bb.0:
132; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
133; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
134; X86-NEXT:    movl %eax, (%ecx)
135; X86-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
136; X86-NEXT:    retl
137;
138; X64-LABEL: n8_extrause:
139; X64:       # %bb.0:
140; X64-NEXT:    movsbl %sil, %eax
141; X64-NEXT:    movl %eax, (%rdx)
142; X64-NEXT:    shlxl %eax, %edi, %eax
143; X64-NEXT:    retq
144  %shamt_wide = sext i8 %shamt to i32
145  store i32 %shamt_wide, ptr %shamt_wide_store, align 4
146  %r = shl i32 %x, %shamt_wide
147  ret i32 %r
148}
149