xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll (revision 08d14e10ca4bdd4626cbe1c893961416f9703d5c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=slp-vectorizer,instcombine -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
3
4define void @test1(ptr %in, ptr %out) {
5; CHECK-LABEL: @test1(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1
8; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64>
9; CHECK-NEXT:    store <8 x i64> [[TMP1]], ptr [[OUT:%.*]], align 8
10; CHECK-NEXT:    ret void
11;
12entry:
13  %0 = load <8 x i32>, ptr %in, align 1
14  %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15  %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
16  %3 = zext <4 x i32> %1 to <4 x i64>
17  %4 = zext <4 x i32> %2 to <4 x i64>
18  %5 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
19  %6 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
20  %7 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
21  %8 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
22  %9 = getelementptr inbounds i64, ptr %out, i64 0
23  %10 = getelementptr inbounds i64, ptr %out, i64 2
24  %11 = getelementptr inbounds i64, ptr %out, i64 4
25  %12 = getelementptr inbounds i64, ptr %out, i64 6
26  store <2 x i64> %5, ptr %9, align 8
27  store <2 x i64> %6, ptr %10, align 8
28  store <2 x i64> %7, ptr %11, align 8
29  store <2 x i64> %8, ptr %12, align 8
30  ret void
31}
32
33define void @test2(ptr %in, ptr %out) {
34; CHECK-LABEL: @test2(
35; CHECK-NEXT:  entry:
36; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1
37; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64>
38; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
39; CHECK-NEXT:    store <8 x i64> [[TMP2]], ptr [[OUT:%.*]], align 8
40; CHECK-NEXT:    ret void
41;
42entry:
43  %0 = load <8 x i32>, ptr %in, align 1
44  %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
45  %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
46  %3 = zext <4 x i32> %1 to <4 x i64>
47  %4 = zext <4 x i32> %2 to <4 x i64>
48  %5 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
49  %6 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
50  %7 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
51  %8 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
52  %9 = getelementptr inbounds i64, ptr %out, i64 0
53  %10 = getelementptr inbounds i64, ptr %out, i64 2
54  %11 = getelementptr inbounds i64, ptr %out, i64 4
55  %12 = getelementptr inbounds i64, ptr %out, i64 6
56  store <2 x i64> %5, ptr %9, align 8
57  store <2 x i64> %6, ptr %10, align 8
58  store <2 x i64> %7, ptr %11, align 8
59  store <2 x i64> %8, ptr %12, align 8
60  ret void
61}
62
63define void @test3(<16 x i32> %0, ptr %out) {
64; CHECK-LABEL: @test3(
65; CHECK-NEXT:  entry:
66; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
67; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
68; CHECK-NEXT:    ret void
69;
70entry:
71  %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
72  %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
73  %3 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
74  %4 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
75  %5 = getelementptr inbounds i32, ptr %out, i64 0
76  %6 = getelementptr inbounds i32, ptr %out, i64 4
77  %7 = getelementptr inbounds i32, ptr %out, i64 8
78  %8 = getelementptr inbounds i32, ptr %out, i64 12
79  store <4 x i32> %1, ptr %5, align 4
80  store <4 x i32> %2, ptr %6, align 4
81  store <4 x i32> %3, ptr %7, align 4
82  store <4 x i32> %4, ptr %8, align 4
83  ret void
84}
85
86define void @test4(ptr %in, ptr %out) {
87; CHECK-LABEL: @test4(
88; CHECK-NEXT:  entry:
89; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4
90; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
91; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
92; CHECK-NEXT:    ret void
93;
94entry:
95  %0 = load <8 x i32>, ptr %in, align 4
96  %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
97  %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
98  %3 = getelementptr inbounds i32, ptr %out, i64 0
99  %4 = getelementptr inbounds i32, ptr %out, i64 4
100  %5 = getelementptr inbounds i32, ptr %out, i64 8
101  %6 = getelementptr inbounds i32, ptr %out, i64 12
102  store <4 x i32> %1, ptr %3, align 4
103  store <4 x i32> %2, ptr %4, align 4
104  store <4 x i32> %1, ptr %5, align 4
105  store <4 x i32> %2, ptr %6, align 4
106  ret void
107}
108
109define void @test5(ptr %out) {
110; CHECK-LABEL: @test5(
111; CHECK-NEXT:  entry:
112; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr [[OUT:%.*]], align 4
113; CHECK-NEXT:    ret void
114;
115entry:
116  %0 = shufflevector <8 x i32> zeroinitializer, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
117  %1 = shufflevector <8 x i32> zeroinitializer, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
118  %2 = getelementptr inbounds i32, ptr %out, i64 0
119  %3 = getelementptr inbounds i32, ptr %out, i64 4
120  store <4 x i32> %0, ptr %2, align 4
121  store <4 x i32> %1, ptr %3, align 4
122  ret void
123}
124
125define void @test6(ptr %in0, ptr %in1, ptr %in2) {
126; CHECK-LABEL: @test6(
127; CHECK-NEXT:  entry:
128; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN0:%.*]], i64 32
129; CHECK-NEXT:    [[LOAD2:%.*]] = load <4 x float>, ptr [[GEP1]], align 16
130; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x float>, ptr [[IN0]], align 16
131; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr [[IN1:%.*]], align 1
132; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <32 x i8> [[TMP1]] to <32 x float>
133; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
134; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
135; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
136; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
137; CHECK-NEXT:    [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]]
138; CHECK-NEXT:    store <32 x float> [[TMP4]], ptr [[IN2:%.*]], align 16
139; CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32
140; CHECK-NEXT:    [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1
141; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
142; CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2]], i64 128
143; CHECK-NEXT:    [[TMP6:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float>
144; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
145; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
146; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
147; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
148; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
149; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
150; CHECK-NEXT:    [[TMP13:%.*]] = fmul <16 x float> [[TMP12]], [[TMP6]]
151; CHECK-NEXT:    store <16 x float> [[TMP13]], ptr [[GEP11]], align 16
152; CHECK-NEXT:    ret void
153;
154entry:
155  %gep0 = getelementptr inbounds i8, ptr %in0, i64 16
156  %gep1 = getelementptr inbounds i8, ptr %in0, i64 32
157  %load0 = load <4 x float>, ptr %in0, align 16
158  %load1 = load <4 x float>, ptr %gep0, align 16
159  %load2 = load <4 x float>, ptr %gep1, align 16
160  %gep2 = getelementptr inbounds i8, ptr %in1, i64 16
161  %load3 = load <16 x i8>, ptr %in1, align 1
162  %load4 = load <16 x i8>, ptr %gep2, align 1
163  %shufflevector0 = shufflevector <16 x i8> %load3, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
164  %shufflevector1 = shufflevector <16 x i8> %load3, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
165  %shufflevector2 = shufflevector <16 x i8> %load4, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
166  %shufflevector3 = shufflevector <16 x i8> %load4, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
167  %zext0 = zext <8 x i8> %shufflevector0 to <8 x i16>
168  %zext1 = zext <8 x i8> %shufflevector1 to <8 x i16>
169  %zext2 = zext <8 x i8> %shufflevector2 to <8 x i16>
170  %zext3 = zext <8 x i8> %shufflevector3 to <8 x i16>
171  %shufflevector4 = shufflevector <8 x i16> %zext0, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
172  %shufflevector5 = shufflevector <8 x i16> %zext0, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
173  %shufflevector6 = shufflevector <8 x i16> %zext1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
174  %shufflevector7 = shufflevector <8 x i16> %zext1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
175  %shufflevector8 = shufflevector <8 x i16> %zext2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
176  %shufflevector9 = shufflevector <8 x i16> %zext2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
177  %shufflevector10 = shufflevector <8 x i16> %zext3, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
178  %shufflevector11 = shufflevector <8 x i16> %zext3, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
179  %uitofp0 = uitofp nneg <4 x i16> %shufflevector4 to <4 x float>
180  %uitofp1 = uitofp nneg <4 x i16> %shufflevector5 to <4 x float>
181  %uitofp2 = uitofp nneg <4 x i16> %shufflevector6 to <4 x float>
182  %uitofp3 = uitofp nneg <4 x i16> %shufflevector7 to <4 x float>
183  %uitofp4 = uitofp nneg <4 x i16> %shufflevector8 to <4 x float>
184  %uitofp5 = uitofp nneg <4 x i16> %shufflevector9 to <4 x float>
185  %uitofp6 = uitofp nneg <4 x i16> %shufflevector10 to <4 x float>
186  %uitofp7 = uitofp nneg <4 x i16> %shufflevector11 to <4 x float>
187  %fmul0 = fmul <4 x float> %load0, %uitofp0
188  %fmul1 = fmul <4 x float> %load1, %uitofp1
189  %fmul2 = fmul <4 x float> %load2, %uitofp2
190  %fmul3 = fmul <4 x float> %load0, %uitofp3
191  %fmul4 = fmul <4 x float> %load1, %uitofp4
192  %fmul5 = fmul <4 x float> %load2, %uitofp5
193  %fmul6 = fmul <4 x float> %load0, %uitofp6
194  %fmul7 = fmul <4 x float> %load1, %uitofp7
195  %gep3 = getelementptr inbounds i8, ptr %in2, i64 16
196  %gep4 = getelementptr inbounds i8, ptr %in2, i64 32
197  %gep5 = getelementptr inbounds i8, ptr %in2, i64 48
198  %gep6 = getelementptr inbounds i8, ptr %in2, i64 64
199  %gep7 = getelementptr inbounds i8, ptr %in2, i64 80
200  %gep8 = getelementptr inbounds i8, ptr %in2, i64 96
201  %gep9 = getelementptr inbounds i8, ptr %in2, i64 112
202  store <4 x float> %fmul0, ptr %in2, align 16
203  store <4 x float> %fmul1, ptr %gep3, align 16
204  store <4 x float> %fmul2, ptr %gep4, align 16
205  store <4 x float> %fmul3, ptr %gep5, align 16
206  store <4 x float> %fmul4, ptr %gep6, align 16
207  store <4 x float> %fmul5, ptr %gep7, align 16
208  store <4 x float> %fmul6, ptr %gep8, align 16
209  store <4 x float> %fmul7, ptr %gep9, align 16
210  %gep10 = getelementptr inbounds i8, ptr %in1, i64 32
211  %load5 = load <16 x i8>, ptr %gep10, align 1
212  %shufflevector12 = shufflevector <16 x i8> %load5, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
213  %shufflevector13 = shufflevector <16 x i8> %load5, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
214  %zext4 = zext <8 x i8> %shufflevector12 to <8 x i16>
215  %zext5 = zext <8 x i8> %shufflevector13 to <8 x i16>
216  %shufflevector14 = shufflevector <8 x i16> %zext4, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
217  %shufflevector15 = shufflevector <8 x i16> %zext4, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
218  %shufflevector16 = shufflevector <8 x i16> %zext5, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
219  %shufflevector17 = shufflevector <8 x i16> %zext5, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
220  %uitofp8 = uitofp nneg <4 x i16> %shufflevector14 to <4 x float>
221  %uitofp9 = uitofp nneg <4 x i16> %shufflevector15 to <4 x float>
222  %uitofp10 = uitofp nneg <4 x i16> %shufflevector16 to <4 x float>
223  %uitofp11 = uitofp nneg <4 x i16> %shufflevector17 to <4 x float>
224  %fmul8 = fmul <4 x float> %load2, %uitofp8
225  %fmul9 = fmul <4 x float> %load0, %uitofp9
226  %fmul10 = fmul <4 x float> %load1, %uitofp10
227  %fmul11 = fmul <4 x float> %load2, %uitofp11
228  %gep11 = getelementptr inbounds i8, ptr %in2, i64 128
229  %gep12 = getelementptr inbounds i8, ptr %in2, i64 144
230  %gep13 = getelementptr inbounds i8, ptr %in2, i64 160
231  %gep14 = getelementptr inbounds i8, ptr %in2, i64 176
232  store <4 x float> %fmul8, ptr %gep11, align 16
233  store <4 x float> %fmul9, ptr %gep12, align 16
234  store <4 x float> %fmul10, ptr %gep13, align 16
235  store <4 x float> %fmul11, ptr %gep14, align 16
236  ret void
237}
238