xref: /llvm-project/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp (revision 7eca38ce76d5d1915f4ab7e665964062c0b37697)
1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4 
5 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
6 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
7 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
8 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
9 // expected-no-diagnostics
10 #ifndef HEADER
11 #define HEADER
12 
13 void foo();
14 void bar();
15 
16 // CHECK: define{{.*}} void @{{.*}}baz{{.*}}(i32 noundef %n)
17 void baz(int n) {
18   static float a[10];
19   static double b;
20 
21   // CHECK: call ptr @llvm.stacksave.p0()
22   // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
23 
24   // float a_buffer[10][n];
25   // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
26   // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
27 
28   // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(
29 
30   // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
31 
32   // float a_buffer[10][n];
33   // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
34 
35   // double b_buffer[10];
36   // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
37   // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(
38   // CHECK: call void @llvm.stackrestore.p0(ptr
39 
40 #pragma omp parallel for simd reduction(inscan, +:a[:n], b)
41   for (int i = 0; i < 10; ++i) {
42     // CHECK: call void @__kmpc_for_static_init_4(
43     // CHECK: call ptr @llvm.stacksave.p0()
44     // CHECK: store float 0.000000e+00, ptr %
45     // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
46     // CHECK: br label %[[DISPATCH:[^,]+]]
47     // CHECK: [[INPUT_PHASE:.+]]:
48     // CHECK: call void @{{.+}}foo{{.+}}()
49 
50     // a_buffer[i][0..n] = a_priv[[0..n];
51     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
52     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
53     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
54     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]]
55     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
56     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
57     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
58 
59     // b_buffer[i] = b_priv;
60     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
61     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
62     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
63     // CHECK: br label %[[LOOP_CONTINUE:.+]]
64 
65     // CHECK: [[DISPATCH]]:
66     // CHECK: br label %[[INPUT_PHASE]]
67     // CHECK: [[LOOP_CONTINUE]]:
68     // CHECK: call void @llvm.stackrestore.p0(ptr %
69     // CHECK: call void @__kmpc_for_static_fini(
70     // CHECK: call void @__kmpc_barrier(
71     foo();
72 #pragma omp scan inclusive(a[:n], b)
73     // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
74     // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
75     // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
76     // CHECK: br label %[[OUTER_BODY:[^,]+]]
77     // CHECK: [[OUTER_BODY]]:
78     // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
79     // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
80     // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
81     // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
82     // CHECK: [[INNER_BODY]]:
83     // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
84 
85     // a_buffer[i] += a_buffer[i-pow(2, k)];
86     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
87     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
88     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
89     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
90     // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
91     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
92     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
93     // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
94     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
95     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
96     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
97     // CHECK: [[RED_BODY]]:
98     // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
99     // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
100     // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, ptr [[A_BUF_IDX_ELEM]],
101     // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]],
102     // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
103     // CHECK: store float [[RED]], ptr [[A_BUF_IDX_ELEM]],
104     // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_ELEM]], i32 1
105     // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
106     // CHECK: [[DONE:%.+]] = icmp eq ptr [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
107     // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
108     // CHECK: [[RED_DONE]]:
109 
110     // b_buffer[i] += b_buffer[i-pow(2, k)];
111     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
112     // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, ptr [[B_BUF_IDX_SUB_K2POW]],
113     // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
114     // CHECK: store double [[RED]], ptr [[B_BUF_IDX]],
115 
116     // --i;
117     // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
118     // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
119     // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
120     // CHECK: [[INNER_EXIT]]:
121 
122     // ++k;
123     // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
124     // k2pow <<= 1;
125     // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
126     // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
127     // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
128     // CHECK: [[OUTER_EXIT]]:
129     bar();
130     // CHECK: call void @__kmpc_for_static_init_4(
131     // CHECK: call ptr @llvm.stacksave.p0()
132     // CHECK: store float 0.000000e+00, ptr %
133     // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
134     // CHECK: br label %[[DISPATCH:[^,]+]]
135 
136     // Skip the before scan body.
137     // CHECK: call void @{{.+}}foo{{.+}}()
138 
139     // CHECK: [[EXIT_INSCAN:[^,]+]]:
140     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
141 
142     // CHECK: [[DISPATCH]]:
143     // a_priv[[0..n] = a_buffer[i][0..n];
144     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
145     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
146     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
147     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
148     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
149     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
150     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
151 
152     // b_priv = b_buffer[i];
153     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]]
154     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
155     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
156     // CHECK: br label %[[SCAN_PHASE:[^,]+]]
157 
158     // CHECK: [[SCAN_PHASE]]:
159     // CHECK: call void @{{.+}}bar{{.+}}()
160     // CHECK: br label %[[EXIT_INSCAN]]
161 
162     // CHECK: [[LOOP_CONTINUE]]:
163     // CHECK: call void @llvm.stackrestore.p0(ptr %
164     // CHECK: call void @__kmpc_for_static_fini(
165   }
166 
167 #pragma omp parallel for simd reduction(inscan, +:a[:n], b)
168   for (int i = 0; i < 10; ++i) {
169     // CHECK: call void @__kmpc_for_static_init_4(
170     // CHECK: call ptr @llvm.stacksave.p0()
171     // CHECK: store float 0.000000e+00, ptr %
172     // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
173     // CHECK: br label %[[DISPATCH:[^,]+]]
174 
175     // Skip the before scan body.
176     // CHECK: call void @{{.+}}foo{{.+}}()
177 
178     // CHECK: [[EXIT_INSCAN:[^,]+]]:
179 
180     // a_buffer[i][0..n] = a_priv[[0..n];
181     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
182     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
183     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
184     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]]
185     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
186     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
187     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
188 
189     // b_buffer[i] = b_priv;
190     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
191     // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
192     // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
193     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
194 
195     // CHECK: [[DISPATCH]]:
196     // CHECK: br label %[[INPUT_PHASE:[^,]+]]
197 
198     // CHECK: [[INPUT_PHASE]]:
199     // CHECK: call void @{{.+}}bar{{.+}}()
200     // CHECK: br label %[[EXIT_INSCAN]]
201 
202     // CHECK: [[LOOP_CONTINUE]]:
203     // CHECK: call void @llvm.stackrestore.p0(ptr %
204     // CHECK: call void @__kmpc_for_static_fini(
205     // CHECK: call void @__kmpc_barrier(
206     foo();
207 #pragma omp scan exclusive(a[:n], b)
208     // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
209     // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
210     // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
211     // CHECK: br label %[[OUTER_BODY:[^,]+]]
212     // CHECK: [[OUTER_BODY]]:
213     // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
214     // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
215     // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
216     // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
217     // CHECK: [[INNER_BODY]]:
218     // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
219 
220     // a_buffer[i] += a_buffer[i-pow(2, k)];
221     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
222     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
223     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
224     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
225     // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
226     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]]
227     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
228     // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
229     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
230     // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
231     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
232     // CHECK: [[RED_BODY]]:
233     // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
234     // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
235     // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, ptr [[A_BUF_IDX_ELEM]],
236     // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]],
237     // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
238     // CHECK: store float [[RED]], ptr [[A_BUF_IDX_ELEM]],
239     // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_ELEM]], i32 1
240     // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
241     // CHECK: [[DONE:%.+]] = icmp eq ptr [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
242     // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
243     // CHECK: [[RED_DONE]]:
244 
245     // b_buffer[i] += b_buffer[i-pow(2, k)];
246     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
247     // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, ptr [[B_BUF_IDX_SUB_K2POW]],
248     // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
249     // CHECK: store double [[RED]], ptr [[B_BUF_IDX]],
250 
251     // --i;
252     // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
253     // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
254     // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
255     // CHECK: [[INNER_EXIT]]:
256 
257     // ++k;
258     // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
259     // k2pow <<= 1;
260     // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
261     // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
262     // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
263     // CHECK: [[OUTER_EXIT]]:
264     bar();
265     // CHECK: call void @__kmpc_for_static_init_4(
266     // CHECK: call ptr @llvm.stacksave.p0()
267     // CHECK: store float 0.000000e+00, ptr %
268     // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
269     // CHECK: br label %[[DISPATCH:[^,]+]]
270 
271     // CHECK: [[SCAN_PHASE:.+]]:
272     // CHECK: call void @{{.+}}foo{{.+}}()
273     // CHECK: br label %[[LOOP_CONTINUE:.+]]
274 
275     // CHECK: [[DISPATCH]]:
276     // if (i >0)
277     //   a_priv[[0..n] = a_buffer[i-1][0..n];
278     // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
279     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
280     // CHECK: [[CMP:%.+]] = icmp eq i64 [[BASE_IDX]], 0
281     // CHECK: br i1 [[CMP]], label %[[IF_DONE:[^,]+]], label %[[IF_THEN:[^,]+]]
282     // CHECK: [[IF_THEN]]:
283     // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
284     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
285     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]]
286     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
287     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
288     // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
289 
290     // b_priv = b_buffer[i];
291     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
292     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
293     // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
294     // CHECK: br label %[[SCAN_PHASE]]
295 
296     // CHECK: [[LOOP_CONTINUE]]:
297     // CHECK: call void @llvm.stackrestore.p0(ptr %
298     // CHECK: call void @__kmpc_for_static_fini(
299   }
300 }
301 
302 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
303 
304 #endif
305 
306