xref: /llvm-project/llvm/test/Transforms/LoopVectorize/struct-return.ll (revision f88ef1bd1bd6ea27237d2abd03b8955e550f97c1)
1; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S -pass-remarks-analysis=loop-vectorize 2>%t | FileCheck %s
2; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s
3
4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5
6; Tests basic vectorization of homogeneous struct literal returns.
7
8; TODO: Support vectorization in this case.
9; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
10define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
11; CHECK-LABEL: define void @struct_return_f32_widen
12; CHECK-NOT:   vector.body:
13entry:
14  br label %for.body
15
16for.body:
17  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
18  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
19  %in_val = load float, ptr %arrayidx, align 4
20  %call = tail call { float, float } @foo(float %in_val) #0
21  %extract_a = extractvalue { float, float } %call, 0
22  %extract_b = extractvalue { float, float } %call, 1
23  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
24  store float %extract_a, ptr %arrayidx2, align 4
25  %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
26  store float %extract_b, ptr %arrayidx4, align 4
27  %iv.next = add nuw nsw i64 %iv, 1
28  %exitcond.not = icmp eq i64 %iv.next, 1024
29  br i1 %exitcond.not, label %exit, label %for.body
30
31exit:
32  ret void
33}
34
35; TODO: Support vectorization in this case.
36; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
37define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
38; CHECK-LABEL: define void @struct_return_f64_widen
39; CHECK-NOT:   vector.body:
40entry:
41  br label %for.body
42
43for.body:
44  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
45  %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
46  %in_val = load double, ptr %arrayidx, align 8
47  %call = tail call { double, double } @bar(double %in_val) #1
48  %extract_a = extractvalue { double, double } %call, 0
49  %extract_b = extractvalue { double, double } %call, 1
50  %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
51  store double %extract_a, ptr %arrayidx2, align 8
52  %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
53  store double %extract_b, ptr %arrayidx4, align 8
54  %iv.next = add nuw nsw i64 %iv, 1
55  %exitcond.not = icmp eq i64 %iv.next, 1024
56  br i1 %exitcond.not, label %exit, label %for.body
57
58exit:
59  ret void
60}
61
62; TODO: Support vectorization in this case.
63; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
64define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
65; CHECK-LABEL: define void @struct_return_f32_replicate
66; CHECK-NOT:   vector.body:
67entry:
68  br label %for.body
69
70for.body:
71  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
72  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
73  %in_val = load float, ptr %arrayidx, align 4
74  ; #3 does not have a fixed-size vector mapping (so replication is used)
75  %call = tail call { float, float } @foo(float %in_val) #3
76  %extract_a = extractvalue { float, float } %call, 0
77  %extract_b = extractvalue { float, float } %call, 1
78  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
79  store float %extract_a, ptr %arrayidx2, align 4
80  %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
81  store float %extract_b, ptr %arrayidx4, align 4
82  %iv.next = add nuw nsw i64 %iv, 1
83  %exitcond.not = icmp eq i64 %iv.next, 1024
84  br i1 %exitcond.not, label %exit, label %for.body
85
86exit:
87  ret void
88}
89
90; TODO: Support vectorization in this case.
91; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
92define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) {
93; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks
94; CHECK-NOT:   vector.body:
95entry:
96  br label %for.body
97
98for.body:
99  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
100  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
101  %in_val = load float, ptr %arrayidx, align 4
102  %call = tail call { float, float } @foo(float %in_val) #0
103  %extract_a = extractvalue { float, float } %call, 0
104  %extract_b = extractvalue { float, float } %call, 1
105  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
106  store float %extract_a, ptr %arrayidx2, align 4
107  %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
108  store float %extract_b, ptr %arrayidx4, align 4
109  %iv.next = add nuw nsw i64 %iv, 1
110  %exitcond.not = icmp eq i64 %iv.next, 1024
111  br i1 %exitcond.not, label %exit, label %for.body
112
113exit:
114  ret void
115}
116
117; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
118; CHECK-REMARKS:         remark: {{.*}} loop not vectorized: call instruction cannot be vectorized
119define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
120; CHECK-LABEL: define void @test_overflow_intrinsic
121; CHECK-NOT:   vector.body:
122; CHECK-NOT:   @llvm.sadd.with.overflow.v{{.+}}i32
123entry:
124  br label %for.body
125
126for.body:
127  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
128  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
129  %in_val = load i32, ptr %arrayidx, align 4
130  %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val)
131  %extract_ret = extractvalue { i32, i1 } %call, 0
132  %extract_overflow = extractvalue { i32, i1 } %call, 1
133  %zext_overflow = zext i1 %extract_overflow to i8
134  %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv
135  store i32 %extract_ret, ptr %arrayidx2, align 4
136  %arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv
137  store i8 %zext_overflow, ptr %arrayidx4, align 4
138  %iv.next = add nuw nsw i64 %iv, 1
139  %exitcond.not = icmp eq i64 %iv.next, 1024
140  br i1 %exitcond.not, label %exit, label %for.body
141
142exit:
143  ret void
144}
145
146; TODO: Support vectorization in this case.
147; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
148define void @struct_return_i32_three_results_widen(ptr noalias %in, ptr noalias writeonly %out_a) {
149; CHECK-LABEL: define void @struct_return_i32_three_results_widen
150; CHECK-NOT:   vector.body:
151entry:
152  br label %for.body
153
154for.body:
155  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
156  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %iv
157  %in_val = load i32, ptr %arrayidx, align 4
158  %call = tail call { i32, i32, i32 } @qux(i32 %in_val) #5
159  %extract_a = extractvalue { i32, i32, i32 } %call, 0
160  %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv
161  store i32 %extract_a, ptr %arrayidx2, align 4
162  %iv.next = add nuw nsw i64 %iv, 1
163  %exitcond.not = icmp eq i64 %iv.next, 1024
164  br i1 %exitcond.not, label %exit, label %for.body
165
166exit:
167  ret void
168}
169
170; Negative test. Widening structs of vectors is not supported.
171; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
172define void @negative_struct_of_vectors(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
173; CHECK-LABEL: define void @negative_struct_of_vectors
174; CHECK-NOT:   vector.body:
175entry:
176  br label %for.body
177
178for.body:
179  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
180  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
181  %in_val = load <1 x float>, ptr %arrayidx, align 4
182  %call = tail call { <1 x float>, <1 x float> } @foo(<1 x float> %in_val) #0
183  %extract_a = extractvalue { <1 x float>, <1 x float> } %call, 0
184  %extract_b = extractvalue { <1 x float>, <1 x float> } %call, 1
185  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
186  store <1 x float> %extract_a, ptr %arrayidx2, align 4
187  %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
188  store <1 x float> %extract_b, ptr %arrayidx4, align 4
189  %iv.next = add nuw nsw i64 %iv, 1
190  %exitcond.not = icmp eq i64 %iv.next, 1024
191  br i1 %exitcond.not, label %exit, label %for.body
192
193exit:
194  ret void
195}
196
197; Negative test. Widening structs with mixed element types is not supported.
198; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
199define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
200; CHECK-LABEL: define void @negative_mixed_element_type_struct_return
201; CHECK-NOT:   vector.body:
202; CHECK-NOT:   call {{.*}} @fixed_vec_baz
203entry:
204  br label %for.body
205
206for.body:
207  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
208  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
209  %in_val = load float, ptr %arrayidx, align 4
210  %call = tail call { float, i32 } @baz(float %in_val) #2
211  %extract_a = extractvalue { float, i32 } %call, 0
212  %extract_b = extractvalue { float, i32 } %call, 1
213  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
214  store float %extract_a, ptr %arrayidx2, align 4
215  %arrayidx4 = getelementptr inbounds i32, ptr %out_b, i64 %iv
216  store i32 %extract_b, ptr %arrayidx4, align 4
217  %iv.next = add nuw nsw i64 %iv, 1
218  %exitcond.not = icmp eq i64 %iv.next, 1024
219  br i1 %exitcond.not, label %exit, label %for.body
220
221exit:
222  ret void
223}
224
225%named_struct = type { double, double }
226
227; Negative test. Widening non-literal structs is not supported.
228; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
229define void @negative_named_struct_return(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
230; CHECK-LABEL: define void @negative_named_struct_return
231; CHECK-NOT:   vector.body:
232; CHECK-NOT:   call {{.*}} @fixed_vec_bar
233entry:
234  br label %for.body
235
236for.body:
237  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
238  %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
239  %in_val = load double, ptr %arrayidx, align 8
240  %call = tail call %named_struct @bar_named(double %in_val) #4
241  %extract_a = extractvalue %named_struct %call, 0
242  %extract_b = extractvalue %named_struct %call, 1
243  %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
244  store double %extract_a, ptr %arrayidx2, align 8
245  %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
246  store double %extract_b, ptr %arrayidx4, align 8
247  %iv.next = add nuw nsw i64 %iv, 1
248  %exitcond.not = icmp eq i64 %iv.next, 1024
249  br i1 %exitcond.not, label %exit, label %for.body
250
251exit:
252  ret void
253}
254
255; Negative test. Nested homogeneous structs are not supported.
256; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
257define void @negative_nested_struct(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
258; CHECK-LABEL: define void @negative_nested_struct
259; CHECK-NOT:   vector.body:
260entry:
261  br label %for.body
262
263for.body:
264  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
265  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
266  %in_val = load float, ptr %arrayidx, align 4
267  %call = tail call { { float, float } } @foo_nested_struct(float %in_val) #0
268  %extract_inner = extractvalue { { float, float } } %call, 0
269  %extract_a = extractvalue { float, float } %extract_inner, 0
270  %extract_b = extractvalue { float, float } %extract_inner, 1
271  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
272  store float %extract_a, ptr %arrayidx2, align 4
273  %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
274  store float %extract_b, ptr %arrayidx4, align 4
275  %iv.next = add nuw nsw i64 %iv, 1
276  %exitcond.not = icmp eq i64 %iv.next, 1024
277  br i1 %exitcond.not, label %exit, label %for.body
278
279exit:
280  ret void
281}
282
283; Negative test. The second element of the struct cannot be widened.
284; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
285define void @negative_non_widenable_element(ptr noalias %in, ptr noalias writeonly %out_a) {
286; CHECK-LABEL: define void @negative_non_widenable_element
287; CHECK-NOT:   vector.body:
288entry:
289  br label %for.body
290
291for.body:
292  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
293  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
294  %in_val = load float, ptr %arrayidx, align 4
295  %call = tail call { float, [1 x float] } @foo_one_non_widenable_element(float %in_val) #0
296  %extract_a = extractvalue { float, [1 x float] } %call, 0
297  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
298  store float %extract_a, ptr %arrayidx2, align 4
299  %iv.next = add nuw nsw i64 %iv, 1
300  %exitcond.not = icmp eq i64 %iv.next, 1024
301  br i1 %exitcond.not, label %exit, label %for.body
302
303exit:
304  ret void
305}
306
307; Negative test. Homogeneous structs of arrays are not supported.
308; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
309define void @negative_struct_array_elements(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
310; CHECK-LABEL: define void @negative_struct_array_elements
311; CHECK-NOT:   vector.body:
312entry:
313  br label %for.body
314
315for.body:
316  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
317  %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
318  %in_val = load float, ptr %arrayidx, align 4
319  %call = tail call { [2 x float] } @foo_arrays(float %in_val) #0
320  %extract_inner = extractvalue { [2 x float] } %call, 0
321  %extract_a = extractvalue [2 x float] %extract_inner, 0
322  %extract_b = extractvalue [2 x float] %extract_inner, 1
323  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
324  store float %extract_a, ptr %arrayidx2, align 4
325  %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
326  store float %extract_b, ptr %arrayidx4, align 4
327  %iv.next = add nuw nsw i64 %iv, 1
328  %exitcond.not = icmp eq i64 %iv.next, 1024
329  br i1 %exitcond.not, label %exit, label %for.body
330
331exit:
332  ret void
333}
334
335; Negative test. Widening struct loads is not supported.
336; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
337define void @negative_struct_load(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
338; CHECK-LABEL: define void @negative_struct_load
339; CHECK-NOT:   vector.body:
340entry:
341  br label %for.body
342
343for.body:
344  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
345  %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv
346  %call = load { float, float }, ptr %arrayidx, align 8
347  %extract_a = extractvalue { float, float } %call, 0
348  %extract_b = extractvalue { float, float } %call, 1
349  %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
350  store float %extract_a, ptr %arrayidx2, align 4
351  %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
352  store float %extract_b, ptr %arrayidx4, align 4
353  %iv.next = add nuw nsw i64 %iv, 1
354  %exitcond.not = icmp eq i64 %iv.next, 1024
355  br i1 %exitcond.not, label %exit, label %for.body
356
357exit:
358  ret void
359}
360
361; Negative test. Widening struct stores is not supported.
362; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
363define void @negative_struct_return_store_struct(ptr noalias %in, ptr noalias writeonly %out) {
364; CHECK-LABEL: define void @negative_struct_return_store_struct
365; CHECK-NOT:   vector.body:
366entry:
367  br label %for.body
368
369for.body:
370  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
371  %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv
372  %in_val = load float, ptr %arrayidx, align 4
373  %call = tail call { float, float } @foo(float %in_val) #0
374  %out_ptr = getelementptr inbounds { float, float }, ptr %out, i64 %iv
375  store { float, float } %call, ptr %out_ptr, align 8
376  %iv.next = add nuw nsw i64 %iv, 1
377  %exitcond.not = icmp eq i64 %iv.next, 1024
378  br i1 %exitcond.not, label %exit, label %for.body
379
380exit:
381  ret void
382}
383
384declare { float, float } @foo(float)
385declare { double, double } @bar(double)
386declare { float, i32 } @baz(float)
387declare %named_struct @bar_named(double)
388declare { { float, float } } @foo_nested_struct(float)
389declare { [2 x float] } @foo_arrays(float)
390declare { float, [1 x float] } @foo_one_non_widenable_element(float)
391declare { <1 x float>, <1 x float> } @foo_vectors(<1 x float>)
392declare { i32, i32, i32 } @qux(i32)
393
394declare { <2 x float>, <2 x float> } @fixed_vec_foo(<2 x float>)
395declare { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double>)
396declare { <2 x float>, <2 x i32> } @fixed_vec_baz(<2 x float>)
397declare { <2 x i32>, <2 x i32>, <2 x i32> } @fixed_vec_qux(<2 x i32>)
398
399declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
400
401attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_foo(fixed_vec_foo)" }
402attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar(fixed_vec_bar)" }
403attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_baz(fixed_vec_baz)" }
404attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
405attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar_named(fixed_vec_bar)" }
406attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_qux(fixed_vec_qux)" }
407