xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll (revision 7d7577256b76e4293f455b8093504d5f7044ab4b)
1; RUN: opt -mcpu=skx -S -passes=loop-vectorize,instcombine -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-pc-linux-gnu"
5
6; Case1: With pragma predicate to force tail-folding.
7; All memory opertions are masked.
8;void fold_tail(int * restrict p, int * restrict q1, int * restrict q2, int guard) {
9;   #pragma clang loop vectorize_predicate(enable)
10;   for(int ix=0; ix < 1021; ++ix) {
11;     if (ix > guard) {
12;       p[ix] = q1[ix] + q2[ix];
13;     }
14;   }
15;}
16
17;CHECK-LABEL: @fold_tail
18;CHECK: vector.body:
19;CHECK: call <8 x i32> @llvm.masked.load
20;CHECK: call <8 x i32> @llvm.masked.load
21;CHECK: call void @llvm.masked.store
22
23; Function Attrs: nofree norecurse nounwind uwtable
24define dso_local void @fold_tail(ptr noalias nocapture %p, ptr noalias nocapture readonly %q1, ptr noalias nocapture readonly %q2,
25i32 %guard) local_unnamed_addr #0 {
26entry:
27  %0 = sext i32 %guard to i64
28  br label %for.body
29
30for.cond.cleanup:
31  ret void
32
33for.body:
34  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
35  %cmp1 = icmp sgt i64 %indvars.iv, %0
36  br i1 %cmp1, label %if.then, label %for.inc
37
38if.then:
39  %arrayidx = getelementptr inbounds i32, ptr %q1, i64 %indvars.iv
40  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2
41  %arrayidx3 = getelementptr inbounds i32, ptr %q2, i64 %indvars.iv
42  %2 = load i32, ptr %arrayidx3, align 4, !tbaa !2
43  %add = add nsw i32 %2, %1
44  %arrayidx5 = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
45  store i32 %add, ptr %arrayidx5, align 4, !tbaa !2
46  br label %for.inc
47
48for.inc:
49  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
50  %exitcond = icmp eq i64 %indvars.iv.next, 1021
51  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !8
52}
53
54; Case2: With pragma assume_safety both, load and store are masked.
55; void assume_safety(int * p, int * q1, int * q2, int guard) {
56;   #pragma clang loop vectorize(assume_safety)
57;   for(int ix=0; ix < 1021; ++ix) {
58;     if (ix > guard) {
59;       p[ix] = q1[ix] + q2[ix];
60;     }
61;   }
62;}
63
64;CHECK-LABEL: @assume_safety
65;CHECK: vector.body:
66;CHECK:  call <8 x i32> @llvm.masked.load
67;CHECK:  call void @llvm.masked.store
68
69; Function Attrs: norecurse nounwind uwtable
70define void @assume_safety(ptr nocapture, ptr nocapture readonly, ptr nocapture readonly, i32) local_unnamed_addr #0 {
71  %5 = sext i32 %3 to i64
72  br label %7
73
74; <label>:6:
75  ret void
76
77; <label>:7:
78  %8 = phi i64 [ 0, %4 ], [ %18, %17 ]
79  %9 = icmp sgt i64 %8, %5
80  br i1 %9, label %10, label %17
81
82; <label>:10:
83  %11 = getelementptr inbounds i32, ptr %1, i64 %8
84  %12 = load i32, ptr %11, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
85  %13 = getelementptr inbounds i32, ptr %2, i64 %8
86  %14 = load i32, ptr %13, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
87  %15 = add nsw i32 %14, %12
88  %16 = getelementptr inbounds i32, ptr %0, i64 %8
89  store i32 %15, ptr %16, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
90  br label %17
91
92; <label>:17:
93  %18 = add nuw nsw i64 %8, 1
94  %19 = icmp eq i64 %18, 1021
95  br i1 %19, label %6, label %7, !llvm.loop !6
96}
97
98; Case3: With pragma assume_safety and pragma predicate both the store and the
99; load are masked.
100; void fold_tail_and_assume_safety(int * p, int * q1, int * q2, int guard) {
101;   #pragma clang loop vectorize(assume_safety) vectorize_predicate(enable)
102;   for(int ix=0; ix < 1021; ++ix) {
103;     if (ix > guard) {
104;       p[ix] = q1[ix] + q2[ix];
105;     }
106;   }
107;}
108
109;CHECK-LABEL: @fold_tail_and_assume_safety
110;CHECK: vector.body:
111;CHECK: call <8 x i32> @llvm.masked.load
112;CHECK: call <8 x i32> @llvm.masked.load
113;CHECK: call void @llvm.masked.store
114
115; Function Attrs: nofree norecurse nounwind uwtable
116define dso_local void @fold_tail_and_assume_safety(ptr noalias nocapture %p, ptr noalias nocapture readonly %q1, ptr noalias nocapture readonly %q2,
117i32 %guard) local_unnamed_addr #0 {
118entry:
119  %0 = sext i32 %guard to i64
120  br label %for.body
121
122for.cond.cleanup:
123  ret void
124
125for.body:
126  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
127  %cmp1 = icmp sgt i64 %indvars.iv, %0
128  br i1 %cmp1, label %if.then, label %for.inc
129
130if.then:
131  %arrayidx = getelementptr inbounds i32, ptr %q1, i64 %indvars.iv
132  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2, !llvm.access.group !10
133  %arrayidx3 = getelementptr inbounds i32, ptr %q2, i64 %indvars.iv
134  %2 = load i32, ptr %arrayidx3, align 4, !tbaa !2, !llvm.access.group !10
135  %add = add nsw i32 %2, %1
136  %arrayidx5 = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
137  store i32 %add, ptr %arrayidx5, align 4, !tbaa !2, !llvm.access.group !10
138  br label %for.inc
139
140for.inc:
141  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
142  %exitcond = icmp eq i64 %indvars.iv.next, 1021
143  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !11
144}
145
146attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
147
148!llvm.module.flags = !{!0}
149!llvm.ident = !{!1}
150
151!0 = !{i32 1, !"wchar_size", i32 4}
152!1 = !{!"clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)"}
153!2 = !{!3, !3, i64 0}
154!3 = !{!"int", !4, i64 0}
155!4 = !{!"omnipotent char", !5, i64 0}
156!5 = !{!"Simple C/C++ TBAA"}
157!6 = distinct !{!6, !7}
158!7 = !{!"llvm.loop.vectorize.enable", i1 true}
159
160!8 = distinct !{!8, !9}
161!9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
162
163!10 = distinct !{}
164!11 = distinct !{!11, !12, !13}
165!12 = !{!"llvm.loop.parallel_accesses", !10}
166!13 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
167