xref: /llvm-project/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
4
5; x86 does not have a cheap v16i8 shuffle until SSSE3 (pshufb)
6
7define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) {
8; SSE-LABEL: @bitcast_shuf_narrow_element(
9; SSE-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
10; SSE-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
11; SSE-NEXT:    ret <16 x i8> [[R]]
12;
13; AVX-LABEL: @bitcast_shuf_narrow_element(
14; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
15; AVX-NEXT:    [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
16; AVX-NEXT:    ret <16 x i8> [[R]]
17;
18  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
19  %r = bitcast <4 x i32> %shuf to <16 x i8>
20  ret <16 x i8> %r
21}
22
23; v4f32 is the same cost as v4i32, so this always works
24
25define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) {
26; CHECK-LABEL: @bitcast_shuf_same_size(
27; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float>
28; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
29; CHECK-NEXT:    ret <4 x float> [[R]]
30;
31  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
32  %r = bitcast <4 x i32> %shuf to <4 x float>
33  ret <4 x float> %r
34}
35
36; Length-changing shuffles
37
38define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) {
39; SSE-LABEL: @bitcast_shuf_narrow_element_subvector(
40; SSE-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
41; SSE-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
42; SSE-NEXT:    ret <16 x i8> [[R]]
43;
44; AVX-LABEL: @bitcast_shuf_narrow_element_subvector(
45; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8>
46; AVX-NEXT:    [[R:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
47; AVX-NEXT:    ret <16 x i8> [[R]]
48;
49  %shuf = shufflevector <2 x i32> %v, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
50  %r = bitcast <4 x i32> %shuf to <16 x i8>
51  ret <16 x i8> %r
52}
53
54define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) {
55; SSE-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
56; SSE-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
57; SSE-NEXT:    [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16>
58; SSE-NEXT:    ret <16 x i16> [[R]]
59;
60; AVX-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
61; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
62; AVX-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
63; AVX-NEXT:    ret <16 x i16> [[R]]
64;
65  %shuf = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
66  %r = bitcast <4 x i64> %shuf to <16 x i16>
67  ret <16 x i16> %r
68}
69
70define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) {
71; CHECK-LABEL: @bitcast_shuf_extract_subvector(
72; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i32> [[V:%.*]] to <32 x i8>
73; CHECK-NEXT:    [[R:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
74; CHECK-NEXT:    ret <16 x i8> [[R]]
75;
76  %shuf = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
77  %r = bitcast <4 x i32> %shuf to <16 x i8>
78  ret <16 x i8> %r
79}
80
81; Negative test - must cast to vector type
82
83define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) {
84; CHECK-LABEL: @bitcast_shuf_narrow_element_wrong_type(
85; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
86; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to i128
87; CHECK-NEXT:    ret i128 [[R]]
88;
89  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
90  %r = bitcast <4 x i32> %shuf to i128
91  ret i128 %r
92}
93
94; Widen shuffle elements
95
96define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) {
97; CHECK-LABEL: @bitcast_shuf_wide_element(
98; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <4 x i32>
99; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
100; CHECK-NEXT:    ret <4 x i32> [[R]]
101;
102  %shuf = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
103  %r = bitcast <8 x i16> %shuf to <4 x i32>
104  ret <4 x i32> %r
105}
106
107declare void @use(<4 x i32>)
108
109; Negative test - don't create an extra shuffle
110
111define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) {
112; CHECK-LABEL: @bitcast_shuf_uses(
113; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
114; CHECK-NEXT:    call void @use(<4 x i32> [[SHUF]])
115; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
116; CHECK-NEXT:    ret <16 x i8> [[R]]
117;
118  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
119  call void @use(<4 x i32> %shuf)
120  %r = bitcast <4 x i32> %shuf to <16 x i8>
121  ret <16 x i8> %r
122}
123
124define <2 x i64> @PR35454_1(<2 x i64> %v) {
125; SSE-LABEL: @PR35454_1(
126; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
127; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
128; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8>
129; SSE-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1)
130; SSE-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
131; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
132; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
133; SSE-NEXT:    ret <2 x i64> [[BC3]]
134;
135; AVX-LABEL: @PR35454_1(
136; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
137; AVX-NEXT:    [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
138; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1)
139; AVX-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
140; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
141; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
142; AVX-NEXT:    ret <2 x i64> [[BC3]]
143;
144  %bc = bitcast <2 x i64> %v to <4 x i32>
145  %permil = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
146  %bc1 = bitcast <4 x i32> %permil to <16 x i8>
147  %add = shl <16 x i8> %bc1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
148  %bc2 = bitcast <16 x i8> %add to <4 x i32>
149  %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
150  %bc3 = bitcast <4 x i32> %permil1 to <2 x i64>
151  ret <2 x i64> %bc3
152}
153
154define <2 x i64> @PR35454_2(<2 x i64> %v) {
155; SSE-LABEL: @PR35454_2(
156; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
157; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
158; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16>
159; SSE-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1)
160; SSE-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
161; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
162; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
163; SSE-NEXT:    ret <2 x i64> [[BC3]]
164;
165; AVX-LABEL: @PR35454_2(
166; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
167; AVX-NEXT:    [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
168; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1)
169; AVX-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
170; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
171; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
172; AVX-NEXT:    ret <2 x i64> [[BC3]]
173;
174  %bc = bitcast <2 x i64> %v to <4 x i32>
175  %permil = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
176  %bc1 = bitcast <4 x i32> %permil to <8 x i16>
177  %add = shl <8 x i16> %bc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
178  %bc2 = bitcast <8 x i16> %add to <4 x i32>
179  %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
180  %bc3 = bitcast <4 x i32> %permil1 to <2 x i64>
181  ret <2 x i64> %bc3
182}
183