xref: /llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll (revision 5ea6a3fc6d64d593f447e306c3a9d39e9924ea58)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s
3
4target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
5target triple = "riscv64"
6
7; Dependence distance between read and write is greater than the trip
8; count of the loop.  Thus, values written are never read for any
9; valid vectorization of the loop.
10define void @test(ptr %p) {
11; CHECK-LABEL: @test(
12; CHECK-NEXT:  entry:
13; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
14; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
15; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]]
16; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17; CHECK:       vector.ph:
18; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
19; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
20; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
21; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
22; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
23; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
24; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
25; CHECK:       vector.body:
26; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
27; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
28; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
29; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
30; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32
31; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP6]], 200
32; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
33; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
34; CHECK-NEXT:    store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32
35; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
36; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38; CHECK:       middle.block:
39; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]]
40; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
41; CHECK:       scalar.ph:
42; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
43; CHECK-NEXT:    br label [[LOOP:%.*]]
44; CHECK:       loop:
45; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
46; CHECK-NEXT:    [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
47; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[A1]], align 32
48; CHECK-NEXT:    [[OFFSET:%.*]] = add i64 [[IV]], 200
49; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
50; CHECK-NEXT:    store i64 [[V]], ptr [[A2]], align 32
51; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
52; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[IV]], 199
53; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
54; CHECK:       exit:
55; CHECK-NEXT:    ret void
56;
57entry:
58  br label %loop
59
60loop:
61  %iv = phi i64 [0, %entry], [%iv.next, %loop]
62  %a1 = getelementptr i64, ptr %p, i64 %iv
63  %v = load i64, ptr %a1, align 32
64  %offset = add i64 %iv, 200
65  %a2 = getelementptr i64, ptr %p, i64 %offset
66  store i64 %v, ptr %a2, align 32
67  %iv.next = add i64 %iv, 1
68  %cmp = icmp ne i64 %iv, 199
69  br i1 %cmp, label %loop, label %exit
70
71exit:
72  ret void
73}
74
75; Dependence distance is less than trip count, thus we must prove that
76; chosen VF guaranteed to be less than dependence distance.
77define void @test_may_clobber(ptr %p) {
78; CHECK-LABEL: @test_may_clobber(
79; CHECK-NEXT:  entry:
80; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
81; CHECK:       vector.ph:
82; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
83; CHECK:       vector.body:
84; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
85; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
86; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
87; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
88; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
89; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP0]], 100
90; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]]
91; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
92; CHECK-NEXT:    store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32
93; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
94; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
95; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
96; CHECK:       middle.block:
97; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
98; CHECK:       scalar.ph:
99; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
100; CHECK-NEXT:    br label [[LOOP:%.*]]
101; CHECK:       loop:
102; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
103; CHECK-NEXT:    [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
104; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[A1]], align 32
105; CHECK-NEXT:    [[OFFSET:%.*]] = add i64 [[IV]], 100
106; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
107; CHECK-NEXT:    store i64 [[V]], ptr [[A2]], align 32
108; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
109; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[IV]], 199
110; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
111; CHECK:       exit:
112; CHECK-NEXT:    ret void
113;
114entry:
115  br label %loop
116
117loop:
118  %iv = phi i64 [0, %entry], [%iv.next, %loop]
119  %a1 = getelementptr i64, ptr %p, i64 %iv
120  %v = load i64, ptr %a1, align 32
121  %offset = add i64 %iv, 100
122  %a2 = getelementptr i64, ptr %p, i64 %offset
123  store i64 %v, ptr %a2, align 32
124  %iv.next = add i64 %iv, 1
125  %cmp = icmp ne i64 %iv, 199
126  br i1 %cmp, label %loop, label %exit
127
128exit:
129  ret void
130}
131
132; Trviailly no overlap due to maximum possible value of VLEN and LMUL
133define void @trivial_due_max_vscale(ptr %p) {
134; CHECK-LABEL: @trivial_due_max_vscale(
135; CHECK-NEXT:  entry:
136; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
137; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
138; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]]
139; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
140; CHECK:       vector.ph:
141; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
142; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
143; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
144; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
145; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
146; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
147; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
148; CHECK:       vector.body:
149; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
150; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
151; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
152; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
153; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32
154; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP6]], 8192
155; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
156; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
157; CHECK-NEXT:    store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32
158; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
159; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
160; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
161; CHECK:       middle.block:
162; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]]
163; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
164; CHECK:       scalar.ph:
165; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
166; CHECK-NEXT:    br label [[LOOP:%.*]]
167; CHECK:       loop:
168; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
169; CHECK-NEXT:    [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
170; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[A1]], align 32
171; CHECK-NEXT:    [[OFFSET:%.*]] = add i64 [[IV]], 8192
172; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
173; CHECK-NEXT:    store i64 [[V]], ptr [[A2]], align 32
174; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
175; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[IV]], 199
176; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
177; CHECK:       exit:
178; CHECK-NEXT:    ret void
179;
180entry:
181  br label %loop
182
183loop:
184  %iv = phi i64 [0, %entry], [%iv.next, %loop]
185  %a1 = getelementptr i64, ptr %p, i64 %iv
186  %v = load i64, ptr %a1, align 32
187  %offset = add i64 %iv, 8192
188  %a2 = getelementptr i64, ptr %p, i64 %offset
189  store i64 %v, ptr %a2, align 32
190  %iv.next = add i64 %iv, 1
191  %cmp = icmp ne i64 %iv, 199
192  br i1 %cmp, label %loop, label %exit
193
194exit:
195  ret void
196}
197
198; Dependence distance could be violated via LMUL>=2 or interleaving
199define void @no_high_lmul_or_interleave(ptr %p) {
200; CHECK-LABEL: @no_high_lmul_or_interleave(
201; CHECK-NEXT:  entry:
202; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
203; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
204; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]]
205; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
206; CHECK:       vector.ph:
207; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
208; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
209; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
210; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
211; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
212; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
213; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
214; CHECK:       vector.body:
215; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
216; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
217; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
218; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
219; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32
220; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP6]], 1024
221; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
222; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
223; CHECK-NEXT:    store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32
224; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
225; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
226; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
227; CHECK:       middle.block:
228; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]]
229; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
230; CHECK:       scalar.ph:
231; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
232; CHECK-NEXT:    br label [[LOOP:%.*]]
233; CHECK:       loop:
234; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
235; CHECK-NEXT:    [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
236; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[A1]], align 32
237; CHECK-NEXT:    [[OFFSET:%.*]] = add i64 [[IV]], 1024
238; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
239; CHECK-NEXT:    store i64 [[V]], ptr [[A2]], align 32
240; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
241; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[IV]], 199
242; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
243; CHECK:       exit:
244; CHECK-NEXT:    ret void
245;
246entry:
247  br label %loop
248
249loop:
250  %iv = phi i64 [0, %entry], [%iv.next, %loop]
251  %a1 = getelementptr i64, ptr %p, i64 %iv
252  %v = load i64, ptr %a1, align 32
253  %offset = add i64 %iv, 1024
254  %a2 = getelementptr i64, ptr %p, i64 %offset
255  store i64 %v, ptr %a2, align 32
256  %iv.next = add i64 %iv, 1
257  %cmp = icmp ne i64 %iv, 199
258  br i1 %cmp, label %loop, label %exit
259
260exit:
261  ret void
262}
263