xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll (revision 2fab927546b34f5af7770541a9bbb974d9818c5c)
1; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphones -force-vector-width=4 -force-vector-interleave=1 %s -S | FileCheck %s
2
3; Vectors with i4 elements may not legal with nontemporal stores.
4define void @test_i4_store(ptr %ddst) {
5; CHECK-LABEL: define void @test_i4_store(
6; CHECK-NOT:   vector.body:
7; CHECK:        ret void
8;
9entry:
10  br label %for.body
11
12for.body:                                         ; preds = %entry, %for.body
13  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
14  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
15  %incdec.ptr = getelementptr inbounds i4, ptr %ddst.addr, i64 1
16  store i4 10, ptr %ddst.addr, align 4, !nontemporal !8
17  %add = add nuw nsw i32 %i, 4
18  %cmp = icmp ult i32 %i, 4092
19  br i1 %cmp, label %for.body, label %for.cond.cleanup
20
21for.cond.cleanup:                                 ; preds = %for.body
22  ret void
23}
24
25define void @test_i8_store(ptr %ddst) {
26; CHECK-LABEL: define void @test_i8_store(
27; CHECK-LABEL: vector.body:
28; CHECK:         store <4 x i8> {{.*}} !nontemporal !0
29; CHECK:         br
30;
31entry:
32  br label %for.body
33
34for.body:                                         ; preds = %entry, %for.body
35  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
36  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
37  %incdec.ptr = getelementptr inbounds i8, ptr %ddst.addr, i64 1
38  store i8 10, ptr %ddst.addr, align 4, !nontemporal !8
39  %add = add nuw nsw i32 %i, 4
40  %cmp = icmp ult i32 %i, 4092
41  br i1 %cmp, label %for.body, label %for.cond.cleanup
42
43for.cond.cleanup:                                 ; preds = %for.body
44  ret void
45}
46
47define void @test_half_store(ptr %ddst) {
48; CHECK-LABEL: define void @test_half_store(
49; CHECK-LABEL: vector.body:
50; CHECK:         store <4 x half> {{.*}} !nontemporal !0
51; CHECK:         br
52;
53entry:
54  br label %for.body
55
56for.body:                                         ; preds = %entry, %for.body
57  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
58  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
59  %incdec.ptr = getelementptr inbounds half, ptr %ddst.addr, i64 1
60  store half 10.0, ptr %ddst.addr, align 4, !nontemporal !8
61  %add = add nuw nsw i32 %i, 4
62  %cmp = icmp ult i32 %i, 4092
63  br i1 %cmp, label %for.body, label %for.cond.cleanup
64
65for.cond.cleanup:                                 ; preds = %for.body
66  ret void
67}
68
69define void @test_i16_store(ptr %ddst) {
70; CHECK-LABEL: define void @test_i16_store(
71; CHECK-LABEL: vector.body:
72; CHECK:         store <4 x i16> {{.*}} !nontemporal !0
73; CHECK:         br
74;
75entry:
76  br label %for.body
77
78for.body:                                         ; preds = %entry, %for.body
79  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
80  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
81  %incdec.ptr = getelementptr inbounds i16, ptr %ddst.addr, i64 1
82  store i16 10, ptr %ddst.addr, align 4, !nontemporal !8
83  %add = add nuw nsw i32 %i, 4
84  %cmp = icmp ult i32 %i, 4092
85  br i1 %cmp, label %for.body, label %for.cond.cleanup
86
87for.cond.cleanup:                                 ; preds = %for.body
88  ret void
89}
90
91define void @test_i32_store(ptr nocapture %ddst) {
92; CHECK-LABEL: define void @test_i32_store(
93; CHECK-LABEL: vector.body:
94; CHECK:         store <16 x i32> {{.*}} !nontemporal !0
95; CHECK:         br
96;
97entry:
98  br label %for.body
99
100for.body:                                         ; preds = %entry, %for.body
101  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
102  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
103  %incdec.ptr = getelementptr inbounds i32, ptr %ddst.addr, i64 1
104  store i32 10, ptr %ddst.addr, align 4, !nontemporal !8
105  %incdec.ptr1 = getelementptr inbounds i32, ptr %ddst.addr, i64 2
106  store i32 20, ptr %incdec.ptr, align 4, !nontemporal !8
107  %incdec.ptr2 = getelementptr inbounds i32, ptr %ddst.addr, i64 3
108  store i32 30, ptr %incdec.ptr1, align 4, !nontemporal !8
109  %incdec.ptr3 = getelementptr inbounds i32, ptr %ddst.addr, i64 4
110  store i32 40, ptr %incdec.ptr2, align 4, !nontemporal !8
111  %add = add nuw nsw i32 %i, 4
112  %cmp = icmp ult i32 %i, 4092
113  br i1 %cmp, label %for.body, label %for.cond.cleanup
114
115for.cond.cleanup:                                 ; preds = %for.body
116  ret void
117}
118
119define void @test_i33_store(ptr nocapture %ddst) {
120; CHECK-LABEL: define void @test_i33_store(
121; CHECK-NOT:   vector.body:
122; CHECK:         ret
123;
124entry:
125  br label %for.body
126
127for.body:                                         ; preds = %entry, %for.body
128  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
129  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
130  %incdec.ptr = getelementptr inbounds i33, ptr %ddst.addr, i64 1
131  store i33 10, ptr %ddst.addr, align 4, !nontemporal !8
132  %incdec.ptr1 = getelementptr inbounds i33, ptr %ddst.addr, i64 2
133  store i33 20, ptr %incdec.ptr, align 4, !nontemporal !8
134  %incdec.ptr2 = getelementptr inbounds i33, ptr %ddst.addr, i64 3
135  store i33 30, ptr %incdec.ptr1, align 4, !nontemporal !8
136  %incdec.ptr3 = getelementptr inbounds i33, ptr %ddst.addr, i64 4
137  store i33 40, ptr %incdec.ptr2, align 4, !nontemporal !8
138  %add = add nuw nsw i32 %i, 3
139  %cmp = icmp ult i32 %i, 4092
140  br i1 %cmp, label %for.body, label %for.cond.cleanup
141
142for.cond.cleanup:                                 ; preds = %for.body
143  ret void
144}
145
146define void @test_i40_store(ptr nocapture %ddst) {
147; CHECK-LABEL: define void @test_i40_store(
148; CHECK-NOT:   vector.body:
149; CHECK:         ret
150;
151entry:
152  br label %for.body
153
154for.body:                                         ; preds = %entry, %for.body
155  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
156  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
157  %incdec.ptr = getelementptr inbounds i40, ptr %ddst.addr, i64 1
158  store i40 10, ptr %ddst.addr, align 4, !nontemporal !8
159  %incdec.ptr1 = getelementptr inbounds i40, ptr %ddst.addr, i64 2
160  store i40 20, ptr %incdec.ptr, align 4, !nontemporal !8
161  %incdec.ptr2 = getelementptr inbounds i40, ptr %ddst.addr, i64 3
162  store i40 30, ptr %incdec.ptr1, align 4, !nontemporal !8
163  %incdec.ptr3 = getelementptr inbounds i40, ptr %ddst.addr, i64 4
164  store i40 40, ptr %incdec.ptr2, align 4, !nontemporal !8
165  %add = add nuw nsw i32 %i, 3
166  %cmp = icmp ult i32 %i, 4092
167  br i1 %cmp, label %for.body, label %for.cond.cleanup
168
169for.cond.cleanup:                                 ; preds = %for.body
170  ret void
171}
172define void @test_i64_store(ptr nocapture %ddst) local_unnamed_addr #0 {
173; CHECK-LABEL: define void @test_i64_store(
174; CHECK-LABEL: vector.body:
175; CHECK:         store <4 x i64> {{.*}} !nontemporal !0
176; CHECK:         br
177;
178entry:
179  br label %for.body
180
181for.body:                                         ; preds = %entry, %for.body
182  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
183  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
184  %incdec.ptr = getelementptr inbounds i64, ptr %ddst.addr, i64 1
185  store i64 10, ptr %ddst.addr, align 4, !nontemporal !8
186  %add = add nuw nsw i32 %i, 4
187  %cmp = icmp ult i32 %i, 4092
188  br i1 %cmp, label %for.body, label %for.cond.cleanup
189
190for.cond.cleanup:                                 ; preds = %for.body
191  ret void
192}
193
194define void @test_double_store(ptr %ddst) {
195; CHECK-LABEL: define void @test_double_store(
196; CHECK-LABEL: vector.body:
197; CHECK:         store <4 x double> {{.*}} !nontemporal !0
198; CHECK:         br
199;
200entry:
201  br label %for.body
202
203for.body:                                         ; preds = %entry, %for.body
204  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
205  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
206  %incdec.ptr = getelementptr inbounds double, ptr %ddst.addr, i64 1
207  store double 10.0, ptr %ddst.addr, align 4, !nontemporal !8
208  %add = add nuw nsw i32 %i, 4
209  %cmp = icmp ult i32 %i, 4092
210  br i1 %cmp, label %for.body, label %for.cond.cleanup
211
212for.cond.cleanup:                                 ; preds = %for.body
213  ret void
214}
215
216define void @test_i128_store(ptr %ddst) {
217; CHECK-LABEL: define void @test_i128_store(
218; CHECK-LABEL: vector.body:
219; CHECK:         store <4 x i128> {{.*}} !nontemporal !0
220; CHECK:         br
221;
222entry:
223  br label %for.body
224
225for.body:                                         ; preds = %entry, %for.body
226  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
227  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
228  %incdec.ptr = getelementptr inbounds i128, ptr %ddst.addr, i64 1
229  store i128 10, ptr %ddst.addr, align 4, !nontemporal !8
230  %add = add nuw nsw i32 %i, 4
231  %cmp = icmp ult i32 %i, 4092
232  br i1 %cmp, label %for.body, label %for.cond.cleanup
233
234for.cond.cleanup:                                 ; preds = %for.body
235  ret void
236}
237
238define void @test_i256_store(ptr %ddst) {
239; CHECK-LABEL: define void @test_i256_store(
240; CHECK-NOT:   vector.body:
241; CHECK:        ret void
242;
243entry:
244  br label %for.body
245
246for.body:                                         ; preds = %entry, %for.body
247  %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
248  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
249  %incdec.ptr = getelementptr inbounds i256, ptr %ddst.addr, i64 1
250  store i256 10, ptr %ddst.addr, align 4, !nontemporal !8
251  %add = add nuw nsw i32 %i, 4
252  %cmp = icmp ult i32 %i, 4092
253  br i1 %cmp, label %for.body, label %for.cond.cleanup
254
255for.cond.cleanup:                                 ; preds = %for.body
256  ret void
257}
258
259define i4 @test_i4_load(ptr %ddst) {
260; CHECK-LABEL: define i4 @test_i4_load
261; CHECK-NOT: vector.body:
262; CHECk: ret i4 %{{.*}}
263;
264entry:
265  br label %for.body
266
267for.body:                                         ; preds = %entry, %for.body
268  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
269  %acc.08 = phi i4 [ 0, %entry ], [ %add, %for.body ]
270  %arrayidx = getelementptr inbounds i4, ptr %ddst, i64 %indvars.iv
271  %l = load i4, ptr %arrayidx, align 1, !nontemporal !8
272  %add = add i4 %l, %acc.08
273  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
274  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
275  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
276
277for.cond.cleanup:                                 ; preds = %for.body
278  ret i4 %add
279}
280
281define i8 @test_load_i8(ptr %ddst) {
282; CHECK-LABEL: @test_load_i8(
283; CHECK:   vector.body:
284; CHECK: load <4 x i8>, ptr {{.*}}, align 1, !nontemporal !0
285; CHECk: ret i8 %{{.*}}
286;
287entry:
288  br label %for.body
289
290for.body:                                         ; preds = %entry, %for.body
291  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
292  %acc.08 = phi i8 [ 0, %entry ], [ %add, %for.body ]
293  %arrayidx = getelementptr inbounds i8, ptr %ddst, i64 %indvars.iv
294  %l = load i8, ptr %arrayidx, align 1, !nontemporal !8
295  %add = add i8 %l, %acc.08
296  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
297  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
298  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
299
300for.cond.cleanup:                                 ; preds = %for.body
301  ret i8 %add
302}
303
304define half @test_half_load(ptr %ddst) {
305; CHECK-LABEL: @test_half_load
306; CHECK-LABEL:   vector.body:
307; CHECK: load <4 x half>, ptr {{.*}}, align 2, !nontemporal !0
308; CHECk: ret half %{{.*}}
309;
310entry:
311  br label %for.body
312
313for.body:                                         ; preds = %entry, %for.body
314  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
315  %acc.08 = phi half [ 0.0, %entry ], [ %add, %for.body ]
316  %arrayidx = getelementptr inbounds half, ptr %ddst, i64 %indvars.iv
317  %l = load half, ptr %arrayidx, align 2, !nontemporal !8
318  %add = fadd half %l, %acc.08
319  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
320  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
321  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
322
323for.cond.cleanup:                                 ; preds = %for.body
324  ret half %add
325}
326
327define i16 @test_i16_load(ptr %ddst) {
328; CHECK-LABEL: @test_i16_load
329; CHECK-LABEL:   vector.body:
330; CHECK: load <4 x i16>, ptr {{.*}}, align 2, !nontemporal !0
331; CHECk: ret i16 %{{.*}}
332;
333entry:
334  br label %for.body
335
336for.body:                                         ; preds = %entry, %for.body
337  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
338  %acc.08 = phi i16 [ 0, %entry ], [ %add, %for.body ]
339  %arrayidx = getelementptr inbounds i16, ptr %ddst, i64 %indvars.iv
340  %l = load i16, ptr %arrayidx, align 2, !nontemporal !8
341  %add = add i16 %l, %acc.08
342  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
343  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
344  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
345
346for.cond.cleanup:                                 ; preds = %for.body
347  ret i16 %add
348}
349
350define i32 @test_i32_load(ptr %ddst) {
351; CHECK-LABEL: @test_i32_load
352; CHECK-LABEL:   vector.body:
353; CHECK: load <4 x i32>, ptr {{.*}}, align 4, !nontemporal !0
354; CHECk: ret i32 %{{.*}}
355;
356entry:
357  br label %for.body
358
359for.body:                                         ; preds = %entry, %for.body
360  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
361  %acc.08 = phi i32 [ 0, %entry ], [ %add, %for.body ]
362  %arrayidx = getelementptr inbounds i32, ptr %ddst, i64 %indvars.iv
363  %l = load i32, ptr %arrayidx, align 4, !nontemporal !8
364  %add = add i32 %l, %acc.08
365  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
366  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
367  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
368
369for.cond.cleanup:                                 ; preds = %for.body
370  ret i32 %add
371}
372
373define i33 @test_i33_load(ptr %ddst) {
374; CHECK-LABEL: @test_i33_load
375; CHECK-NOT:   vector.body:
376; CHECk: ret i33 %{{.*}}
377;
378entry:
379  br label %for.body
380
381for.body:                                         ; preds = %entry, %for.body
382  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
383  %acc.08 = phi i33 [ 0, %entry ], [ %add, %for.body ]
384  %arrayidx = getelementptr inbounds i33, ptr %ddst, i64 %indvars.iv
385  %l = load i33, ptr %arrayidx, align 4, !nontemporal !8
386  %add = add i33 %l, %acc.08
387  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
388  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
389  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
390
391for.cond.cleanup:                                 ; preds = %for.body
392  ret i33 %add
393}
394
395define i40 @test_i40_load(ptr %ddst) {
396; CHECK-LABEL: @test_i40_load
397; CHECK-NOT:   vector.body:
398; CHECk: ret i40 %{{.*}}
399;
400entry:
401  br label %for.body
402
403for.body:                                         ; preds = %entry, %for.body
404  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
405  %acc.08 = phi i40 [ 0, %entry ], [ %add, %for.body ]
406  %arrayidx = getelementptr inbounds i40, ptr %ddst, i64 %indvars.iv
407  %l = load i40, ptr %arrayidx, align 4, !nontemporal !8
408  %add = add i40 %l, %acc.08
409  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
410  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
411  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
412
413for.cond.cleanup:                                 ; preds = %for.body
414  ret i40 %add
415}
416
417define i64 @test_i64_load(ptr %ddst) {
418; CHECK-LABEL: @test_i64_load
419; CHECK-LABEL:   vector.body:
420; CHECK: load <4 x i64>, ptr {{.*}}, align 4, !nontemporal !0
421; CHECk: ret i64 %{{.*}}
422;
423entry:
424  br label %for.body
425
426for.body:                                         ; preds = %entry, %for.body
427  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
428  %acc.08 = phi i64 [ 0, %entry ], [ %add, %for.body ]
429  %arrayidx = getelementptr inbounds i64, ptr %ddst, i64 %indvars.iv
430  %l = load i64, ptr %arrayidx, align 4, !nontemporal !8
431  %add = add i64 %l, %acc.08
432  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
433  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
434  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
435
436for.cond.cleanup:                                 ; preds = %for.body
437  ret i64 %add
438}
439
440define double @test_double_load(ptr %ddst) {
441; CHECK-LABEL: @test_double_load
442; CHECK-LABEL:   vector.body:
443; CHECK: load <4 x double>, ptr {{.*}}, align 4, !nontemporal !0
444; CHECk: ret double %{{.*}}
445;
446entry:
447  br label %for.body
448
449for.body:                                         ; preds = %entry, %for.body
450  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
451  %acc.08 = phi double [ 0.0, %entry ], [ %add, %for.body ]
452  %arrayidx = getelementptr inbounds double, ptr %ddst, i64 %indvars.iv
453  %l = load double, ptr %arrayidx, align 4, !nontemporal !8
454  %add = fadd double %l, %acc.08
455  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
456  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
457  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
458
459for.cond.cleanup:                                 ; preds = %for.body
460  ret double %add
461}
462
463define i128 @test_i128_load(ptr %ddst) {
464; CHECK-LABEL: @test_i128_load
465; CHECK-LABEL:   vector.body:
466; CHECK: load <4 x i128>, ptr {{.*}}, align 4, !nontemporal !0
467; CHECk: ret i128 %{{.*}}
468;
469entry:
470  br label %for.body
471
472for.body:                                         ; preds = %entry, %for.body
473  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
474  %acc.08 = phi i128 [ 0, %entry ], [ %add, %for.body ]
475  %arrayidx = getelementptr inbounds i128, ptr %ddst, i64 %indvars.iv
476  %l = load i128, ptr %arrayidx, align 4, !nontemporal !8
477  %add = add i128 %l, %acc.08
478  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
479  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
480  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
481
482for.cond.cleanup:                                 ; preds = %for.body
483  ret i128 %add
484}
485
486define i256 @test_256_load(ptr %ddst) {
487; CHECK-LABEL: @test_256_load
488; CHECK-NOT:   vector.body:
489; CHECk: ret i256 %{{.*}}
490;
491entry:
492  br label %for.body
493
494for.body:                                         ; preds = %entry, %for.body
495  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
496  %acc.08 = phi i256 [ 0, %entry ], [ %add, %for.body ]
497  %arrayidx = getelementptr inbounds i256, ptr %ddst, i64 %indvars.iv
498  %l = load i256, ptr %arrayidx, align 4, !nontemporal !8
499  %add = add i256 %l, %acc.08
500  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
501  %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
502  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
503
504for.cond.cleanup:                                 ; preds = %for.body
505  ret i256 %add
506}
507
508!8 = !{i32 1}
509