xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/vectorize_i8.ll (revision 2be0abb7fe72ed4537b3eabcd3102d48ea845717)
1; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2
3; Vectorize and emit valid code (Issue #54896).
4
5define void @int8x3a2(ptr nocapture align 2 %ptr) {
6  %ptr0 = getelementptr i8, ptr %ptr, i64 0
7  %ptr1 = getelementptr i8, ptr %ptr, i64 1
8  %ptr2 = getelementptr i8, ptr %ptr, i64 2
9
10  %l0 = load i8, ptr %ptr0, align 2
11  %l1 = load i8, ptr %ptr1, align 1
12  %l2 = load i8, ptr %ptr2, align 2
13
14  store i8 %l2, ptr %ptr0, align 2
15  store i8 %l1, ptr %ptr1, align 1
16  store i8 %l0, ptr %ptr2, align 2
17
18  ret void
19
20; CHECK-LABEL: @int8x3a2
21; CHECK-DAG: load <2 x i8>
22; CHECK-DAG: load i8
23; CHECK-DAG: store <2 x i8>
24; CHECK-DAG: store i8
25}
26
27define void @int8x3a4(ptr nocapture align 4 %ptr) {
28  %ptr0 = getelementptr i8, ptr %ptr, i64 0
29  %ptr1 = getelementptr i8, ptr %ptr, i64 1
30  %ptr2 = getelementptr i8, ptr %ptr, i64 2
31
32  %l0 = load i8, ptr %ptr0, align 4
33  %l1 = load i8, ptr %ptr1, align 1
34  %l2 = load i8, ptr %ptr2, align 2
35
36  store i8 %l2, ptr %ptr0, align 2
37  store i8 %l1, ptr %ptr1, align 1
38  store i8 %l0, ptr %ptr2, align 4
39
40  ret void
41
42; CHECK-LABEL: @int8x3a4
43; CHECK: load <2 x i8>
44; CHECK: load i8
45; CHECK: store <2 x i8>
46; CHECK: store i8
47}
48
49define void @int8x12a4(ptr nocapture align 4 %ptr) {
50  %ptr0 = getelementptr i8, ptr %ptr, i64 0
51  %ptr1 = getelementptr i8, ptr %ptr, i64 1
52  %ptr2 = getelementptr i8, ptr %ptr, i64 2
53  %ptr3 = getelementptr i8, ptr %ptr, i64 3
54  %ptr4 = getelementptr i8, ptr %ptr, i64 4
55  %ptr5 = getelementptr i8, ptr %ptr, i64 5
56  %ptr6 = getelementptr i8, ptr %ptr, i64 6
57  %ptr7 = getelementptr i8, ptr %ptr, i64 7
58  %ptr8 = getelementptr i8, ptr %ptr, i64 8
59  %ptr9 = getelementptr i8, ptr %ptr, i64 9
60  %ptra = getelementptr i8, ptr %ptr, i64 10
61  %ptrb = getelementptr i8, ptr %ptr, i64 11
62
63  %l0 = load i8, ptr %ptr0, align 4
64  %l1 = load i8, ptr %ptr1, align 1
65  %l2 = load i8, ptr %ptr2, align 2
66  %l3 = load i8, ptr %ptr3, align 1
67  %l4 = load i8, ptr %ptr4, align 4
68  %l5 = load i8, ptr %ptr5, align 1
69  %l6 = load i8, ptr %ptr6, align 2
70  %l7 = load i8, ptr %ptr7, align 1
71  %l8 = load i8, ptr %ptr8, align 4
72  %l9 = load i8, ptr %ptr9, align 1
73  %la = load i8, ptr %ptra, align 2
74  %lb = load i8, ptr %ptrb, align 1
75
76  store i8 %lb, ptr %ptr0, align 4
77  store i8 %la, ptr %ptr1, align 1
78  store i8 %l9, ptr %ptr2, align 2
79  store i8 %l8, ptr %ptr3, align 1
80  store i8 %l7, ptr %ptr4, align 4
81  store i8 %l6, ptr %ptr5, align 1
82  store i8 %l5, ptr %ptr6, align 2
83  store i8 %l4, ptr %ptr7, align 1
84  store i8 %l3, ptr %ptr8, align 4
85  store i8 %l2, ptr %ptr9, align 1
86  store i8 %l1, ptr %ptra, align 2
87  store i8 %l0, ptr %ptrb, align 1
88
89  ret void
90
91; CHECK-LABEL: @int8x12a4
92; CHECK: load <4 x i8>
93; CHECK: load <4 x i8>
94; CHECK: load <4 x i8>
95; CHECK: store <4 x i8>
96; CHECK: store <4 x i8>
97; CHECK: store <4 x i8>
98}
99
100
101define void @int8x16a4(ptr nocapture align 4 %ptr) {
102  %ptr0 = getelementptr i8, ptr %ptr, i64 0
103  %ptr1 = getelementptr i8, ptr %ptr, i64 1
104  %ptr2 = getelementptr i8, ptr %ptr, i64 2
105  %ptr3 = getelementptr i8, ptr %ptr, i64 3
106  %ptr4 = getelementptr i8, ptr %ptr, i64 4
107  %ptr5 = getelementptr i8, ptr %ptr, i64 5
108  %ptr6 = getelementptr i8, ptr %ptr, i64 6
109  %ptr7 = getelementptr i8, ptr %ptr, i64 7
110  %ptr8 = getelementptr i8, ptr %ptr, i64 8
111  %ptr9 = getelementptr i8, ptr %ptr, i64 9
112  %ptra = getelementptr i8, ptr %ptr, i64 10
113  %ptrb = getelementptr i8, ptr %ptr, i64 11
114  %ptrc = getelementptr i8, ptr %ptr, i64 12
115  %ptrd = getelementptr i8, ptr %ptr, i64 13
116  %ptre = getelementptr i8, ptr %ptr, i64 14
117  %ptrf = getelementptr i8, ptr %ptr, i64 15
118
119  %l0 = load i8, ptr %ptr0, align 4
120  %l1 = load i8, ptr %ptr1, align 1
121  %l2 = load i8, ptr %ptr2, align 2
122  %l3 = load i8, ptr %ptr3, align 1
123  %l4 = load i8, ptr %ptr4, align 4
124  %l5 = load i8, ptr %ptr5, align 1
125  %l6 = load i8, ptr %ptr6, align 2
126  %l7 = load i8, ptr %ptr7, align 1
127  %l8 = load i8, ptr %ptr8, align 4
128  %l9 = load i8, ptr %ptr9, align 1
129  %la = load i8, ptr %ptra, align 2
130  %lb = load i8, ptr %ptrb, align 1
131  %lc = load i8, ptr %ptrc, align 4
132  %ld = load i8, ptr %ptrd, align 1
133  %le = load i8, ptr %ptre, align 2
134  %lf = load i8, ptr %ptrf, align 1
135
136  store i8 %lf, ptr %ptrc, align 4
137  store i8 %le, ptr %ptrd, align 1
138  store i8 %ld, ptr %ptre, align 2
139  store i8 %lc, ptr %ptrf, align 1
140  store i8 %lb, ptr %ptr0, align 4
141  store i8 %la, ptr %ptr1, align 1
142  store i8 %l9, ptr %ptr2, align 2
143  store i8 %l8, ptr %ptr3, align 1
144  store i8 %l7, ptr %ptr4, align 4
145  store i8 %l6, ptr %ptr5, align 1
146  store i8 %l5, ptr %ptr6, align 2
147  store i8 %l4, ptr %ptr7, align 1
148  store i8 %l3, ptr %ptr8, align 4
149  store i8 %l2, ptr %ptr9, align 1
150  store i8 %l1, ptr %ptra, align 2
151  store i8 %l0, ptr %ptrb, align 1
152
153  ret void
154
155; CHECK-LABEL: @int8x16a4
156; CHECK: load <4 x i8>
157; CHECK: load <4 x i8>
158; CHECK: load <4 x i8>
159; CHECK: load <4 x i8>
160; CHECK: store <4 x i8>
161; CHECK: store <4 x i8>
162; CHECK: store <4 x i8>
163; CHECK: store <4 x i8>
164}
165
166define void @int8x8a8(ptr nocapture align 8 %ptr) {
167  %ptr0 = getelementptr i8, ptr %ptr, i64 0
168  %ptr1 = getelementptr i8, ptr %ptr, i64 1
169  %ptr2 = getelementptr i8, ptr %ptr, i64 2
170  %ptr3 = getelementptr i8, ptr %ptr, i64 3
171  %ptr4 = getelementptr i8, ptr %ptr, i64 4
172  %ptr5 = getelementptr i8, ptr %ptr, i64 5
173  %ptr6 = getelementptr i8, ptr %ptr, i64 6
174  %ptr7 = getelementptr i8, ptr %ptr, i64 7
175
176  %l0 = load i8, ptr %ptr0, align 8
177  %l1 = load i8, ptr %ptr1, align 1
178  %l2 = load i8, ptr %ptr2, align 2
179  %l3 = load i8, ptr %ptr3, align 1
180  %l4 = load i8, ptr %ptr4, align 4
181  %l5 = load i8, ptr %ptr5, align 1
182  %l6 = load i8, ptr %ptr6, align 2
183  %l7 = load i8, ptr %ptr7, align 1
184
185  store i8 %l7, ptr %ptr0, align 8
186  store i8 %l6, ptr %ptr1, align 1
187  store i8 %l5, ptr %ptr2, align 2
188  store i8 %l4, ptr %ptr3, align 1
189  store i8 %l3, ptr %ptr4, align 4
190  store i8 %l2, ptr %ptr5, align 1
191  store i8 %l1, ptr %ptr6, align 2
192  store i8 %l0, ptr %ptr7, align 1
193
194  ret void
195
196; CHECK-LABEL: @int8x8a8
197; CHECK: load <8 x i8>
198; CHECK: store <8 x i8>
199}
200
201define void @int8x12a8(ptr nocapture align 8 %ptr) {
202  %ptr0 = getelementptr i8, ptr %ptr, i64 0
203  %ptr1 = getelementptr i8, ptr %ptr, i64 1
204  %ptr2 = getelementptr i8, ptr %ptr, i64 2
205  %ptr3 = getelementptr i8, ptr %ptr, i64 3
206  %ptr4 = getelementptr i8, ptr %ptr, i64 4
207  %ptr5 = getelementptr i8, ptr %ptr, i64 5
208  %ptr6 = getelementptr i8, ptr %ptr, i64 6
209  %ptr7 = getelementptr i8, ptr %ptr, i64 7
210  %ptr8 = getelementptr i8, ptr %ptr, i64 8
211  %ptr9 = getelementptr i8, ptr %ptr, i64 9
212  %ptra = getelementptr i8, ptr %ptr, i64 10
213  %ptrb = getelementptr i8, ptr %ptr, i64 11
214
215  %l0 = load i8, ptr %ptr0, align 8
216  %l1 = load i8, ptr %ptr1, align 1
217  %l2 = load i8, ptr %ptr2, align 2
218  %l3 = load i8, ptr %ptr3, align 1
219  %l4 = load i8, ptr %ptr4, align 4
220  %l5 = load i8, ptr %ptr5, align 1
221  %l6 = load i8, ptr %ptr6, align 2
222  %l7 = load i8, ptr %ptr7, align 1
223  %l8 = load i8, ptr %ptr8, align 8
224  %l9 = load i8, ptr %ptr9, align 1
225  %la = load i8, ptr %ptra, align 2
226  %lb = load i8, ptr %ptrb, align 1
227
228  store i8 %lb, ptr %ptr0, align 8
229  store i8 %la, ptr %ptr1, align 1
230  store i8 %l9, ptr %ptr2, align 2
231  store i8 %l8, ptr %ptr3, align 1
232  store i8 %l7, ptr %ptr4, align 4
233  store i8 %l6, ptr %ptr5, align 1
234  store i8 %l5, ptr %ptr6, align 2
235  store i8 %l4, ptr %ptr7, align 1
236  store i8 %l3, ptr %ptr8, align 8
237  store i8 %l2, ptr %ptr9, align 1
238  store i8 %l1, ptr %ptra, align 2
239  store i8 %l0, ptr %ptrb, align 1
240
241  ret void
242
243; CHECK-LABEL: @int8x12a8
244; CHECK-DAG: load <8 x i8>
245; CHECK-DAG: load <4 x i8>
246; CHECK-DAG: store <8 x i8>
247; CHECK-DAG: store <4 x i8>
248}
249
250
251define void @int8x16a8(ptr nocapture align 8 %ptr) {
252  %ptr0 = getelementptr i8, ptr %ptr, i64 0
253  %ptr1 = getelementptr i8, ptr %ptr, i64 1
254  %ptr2 = getelementptr i8, ptr %ptr, i64 2
255  %ptr3 = getelementptr i8, ptr %ptr, i64 3
256  %ptr4 = getelementptr i8, ptr %ptr, i64 4
257  %ptr5 = getelementptr i8, ptr %ptr, i64 5
258  %ptr6 = getelementptr i8, ptr %ptr, i64 6
259  %ptr7 = getelementptr i8, ptr %ptr, i64 7
260  %ptr8 = getelementptr i8, ptr %ptr, i64 8
261  %ptr9 = getelementptr i8, ptr %ptr, i64 9
262  %ptra = getelementptr i8, ptr %ptr, i64 10
263  %ptrb = getelementptr i8, ptr %ptr, i64 11
264  %ptrc = getelementptr i8, ptr %ptr, i64 12
265  %ptrd = getelementptr i8, ptr %ptr, i64 13
266  %ptre = getelementptr i8, ptr %ptr, i64 14
267  %ptrf = getelementptr i8, ptr %ptr, i64 15
268
269  %l0 = load i8, ptr %ptr0, align 8
270  %l1 = load i8, ptr %ptr1, align 1
271  %l2 = load i8, ptr %ptr2, align 2
272  %l3 = load i8, ptr %ptr3, align 1
273  %l4 = load i8, ptr %ptr4, align 4
274  %l5 = load i8, ptr %ptr5, align 1
275  %l6 = load i8, ptr %ptr6, align 2
276  %l7 = load i8, ptr %ptr7, align 1
277  %l8 = load i8, ptr %ptr8, align 8
278  %l9 = load i8, ptr %ptr9, align 1
279  %la = load i8, ptr %ptra, align 2
280  %lb = load i8, ptr %ptrb, align 1
281  %lc = load i8, ptr %ptrc, align 4
282  %ld = load i8, ptr %ptrd, align 1
283  %le = load i8, ptr %ptre, align 2
284  %lf = load i8, ptr %ptrf, align 1
285
286  store i8 %lf, ptr %ptr0, align 8
287  store i8 %le, ptr %ptr1, align 1
288  store i8 %ld, ptr %ptr2, align 2
289  store i8 %lc, ptr %ptr3, align 1
290  store i8 %lb, ptr %ptr4, align 4
291  store i8 %la, ptr %ptr5, align 1
292  store i8 %l9, ptr %ptr6, align 2
293  store i8 %l8, ptr %ptr7, align 1
294  store i8 %l7, ptr %ptr8, align 8
295  store i8 %l6, ptr %ptr9, align 1
296  store i8 %l5, ptr %ptra, align 2
297  store i8 %l4, ptr %ptrb, align 1
298  store i8 %l3, ptr %ptrc, align 4
299  store i8 %l2, ptr %ptrd, align 1
300  store i8 %l1, ptr %ptre, align 2
301  store i8 %l0, ptr %ptrf, align 1
302
303  ret void
304
305; CHECK-LABEL: @int8x16a8
306; CHECK: load <8 x i8>
307; CHECK: load <8 x i8>
308; CHECK: store <8 x i8>
309; CHECK: store <8 x i8>
310}
311