1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
7 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
8
9 // expected-no-diagnostics
10 #ifndef HEADER
11 #define HEADER
12
13 #define N 1000
14 #define M 10
15
16 template<typename tx>
ftemplate(int n)17 tx ftemplate(int n) {
18 tx a[N];
19 short aa[N];
20 tx b[10];
21 tx c[M][M];
22 tx f = n;
23 tx l;
24 int k;
25
26 #pragma omp target teams distribute parallel for simd lastprivate(l) dist_schedule(static,128) schedule(static,32)
27 for(int i = 0; i < n; i++) {
28 a[i] = 1;
29 l = i;
30 }
31
32 #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
33 for(int i = 0; i < n; i++) {
34 aa[i] += 1;
35 }
36
37 #pragma omp target teams distribute parallel for simd map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
38 for(int i = 0; i < 10; i++) {
39 b[i] += 1;
40 }
41
42 #pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k)
43 for(int i = 0; i < M; i++) {
44 for(int j = 0; j < M; j++) {
45 k = M;
46 c[i][j] = i+j*f+k;
47 }
48 }
49
50 return a[0];
51 }
52
bar(int n)53 int bar(int n){
54 int a = 0;
55
56 a += ftemplate<int>(n);
57
58 return a;
59 }
60
61 #endif
62 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
63 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
64 // CHECK1-NEXT: entry:
65 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
66 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
67 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
68 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
69 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
70 // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
71 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
72 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
73 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
74 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
75 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
76 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
77 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
78 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
79 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
80 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
81 // CHECK1: user_code.entry:
82 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
83 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
84 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
85 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
86 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
87 // CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
88 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
89 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
90 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
91 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
92 // CHECK1-NEXT: call void @__kmpc_target_deinit()
93 // CHECK1-NEXT: ret void
94 // CHECK1: worker.exit:
95 // CHECK1-NEXT: ret void
96 //
97 //
98 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
99 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
100 // CHECK1-NEXT: entry:
101 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
102 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
103 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
104 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
105 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
106 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
107 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
108 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
109 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
110 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
111 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
112 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
113 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
114 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
115 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
116 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
117 // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
118 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
119 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
120 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
121 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
122 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
123 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
124 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
125 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
126 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
127 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
128 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
129 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
130 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
131 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
132 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4
133 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
134 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
135 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
136 // CHECK1: omp.precond.then:
137 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
138 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
139 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
140 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
141 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
142 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
143 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
144 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
145 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
146 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
147 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
148 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
149 // CHECK1: cond.true:
150 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
151 // CHECK1-NEXT: br label [[COND_END:%.*]]
152 // CHECK1: cond.false:
153 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
154 // CHECK1-NEXT: br label [[COND_END]]
155 // CHECK1: cond.end:
156 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
157 // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
158 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
159 // CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
160 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
161 // CHECK1: omp.inner.for.cond:
162 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
163 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
164 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
165 // CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
166 // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
167 // CHECK1: omp.inner.for.body:
168 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
169 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
170 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
171 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
172 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
173 // CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
174 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP18]]
175 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
176 // CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
177 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8, !llvm.access.group [[ACC_GRP18]]
178 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
179 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr
180 // CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP18]]
181 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
182 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr
183 // CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP18]]
184 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
185 // CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr
186 // CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP18]]
187 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
188 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP18]]
189 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
190 // CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr
191 // CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP18]]
192 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]]
193 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP18]]
194 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP18]]
195 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
196 // CHECK1: omp.inner.for.inc:
197 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
198 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
199 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
200 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
201 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
202 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
203 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
204 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
205 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
206 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
207 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
208 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
209 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
210 // CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
211 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
212 // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
213 // CHECK1: cond.true10:
214 // CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
215 // CHECK1-NEXT: br label [[COND_END12:%.*]]
216 // CHECK1: cond.false11:
217 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
218 // CHECK1-NEXT: br label [[COND_END12]]
219 // CHECK1: cond.end12:
220 // CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ]
221 // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
222 // CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
223 // CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
224 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
225 // CHECK1: omp.inner.for.end:
226 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
227 // CHECK1: omp.loop.exit:
228 // CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
229 // CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4
230 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]])
231 // CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
232 // CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
233 // CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
234 // CHECK1: .omp.final.then:
235 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
236 // CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0
237 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
238 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
239 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
240 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
241 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
242 // CHECK1: .omp.final.done:
243 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
244 // CHECK1-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
245 // CHECK1-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
246 // CHECK1: .omp.lastprivate.then:
247 // CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[L_ADDR]], align 4
248 // CHECK1-NEXT: store i32 [[TMP51]], ptr [[L_ADDR]], align 4
249 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
250 // CHECK1: .omp.lastprivate.done:
251 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
252 // CHECK1: omp.precond.end:
253 // CHECK1-NEXT: ret void
254 //
255 //
256 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined
257 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] {
258 // CHECK1-NEXT: entry:
259 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
260 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
261 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
262 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
263 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
264 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
265 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
266 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
267 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
268 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
269 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
270 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
271 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
272 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
273 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
274 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
275 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
276 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
277 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
278 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
279 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
280 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
281 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
282 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
283 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
284 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
285 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
286 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
287 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
288 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
289 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
290 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
291 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4
292 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
293 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
294 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
295 // CHECK1: omp.precond.then:
296 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
297 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
298 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
299 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
300 // CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
301 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
302 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
303 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
304 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
305 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
306 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
307 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
308 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
309 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32)
310 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
311 // CHECK1: omp.dispatch.cond:
312 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
313 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
314 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32
315 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]]
316 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
317 // CHECK1: cond.true:
318 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
319 // CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32
320 // CHECK1-NEXT: br label [[COND_END:%.*]]
321 // CHECK1: cond.false:
322 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
323 // CHECK1-NEXT: br label [[COND_END]]
324 // CHECK1: cond.end:
325 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
326 // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
327 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
328 // CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
329 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
330 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
331 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
332 // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
333 // CHECK1: omp.dispatch.body:
334 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
335 // CHECK1: omp.inner.for.cond:
336 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
337 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
338 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
339 // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
340 // CHECK1: omp.inner.for.body:
341 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
342 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
343 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
344 // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]]
345 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]]
346 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
347 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
348 // CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]]
349 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]]
350 // CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]]
351 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
352 // CHECK1: omp.body.continue:
353 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
354 // CHECK1: omp.inner.for.inc:
355 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
356 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1
357 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
358 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
359 // CHECK1: omp.inner.for.end:
360 // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
361 // CHECK1: omp.dispatch.inc:
362 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
363 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
364 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
365 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4
366 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
367 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
368 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
369 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4
370 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
371 // CHECK1: omp.dispatch.end:
372 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
373 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
374 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
375 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
376 // CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
377 // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
378 // CHECK1: .omp.final.then:
379 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
380 // CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP30]], 0
381 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
382 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1
383 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
384 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[I4]], align 4
385 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
386 // CHECK1: .omp.final.done:
387 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
388 // CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
389 // CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
390 // CHECK1: .omp.lastprivate.then:
391 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4
392 // CHECK1-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4
393 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
394 // CHECK1: .omp.lastprivate.done:
395 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
396 // CHECK1: omp.precond.end:
397 // CHECK1-NEXT: ret void
398 //
399 //
400 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
401 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
402 // CHECK1-NEXT: entry:
403 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
404 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
405 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
406 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
407 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
408 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
409 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
410 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
411 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
412 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
413 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment, ptr [[DYN_PTR]])
414 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
415 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
416 // CHECK1: user_code.entry:
417 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
418 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
419 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
420 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
421 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
422 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
423 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
424 // CHECK1-NEXT: call void @__kmpc_target_deinit()
425 // CHECK1-NEXT: ret void
426 // CHECK1: worker.exit:
427 // CHECK1-NEXT: ret void
428 //
429 //
430 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined
431 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
432 // CHECK1-NEXT: entry:
433 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
434 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
435 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
436 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
437 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
438 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
439 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
440 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
441 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
442 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
443 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
444 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
445 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
446 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
447 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
448 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
449 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
450 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
451 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
452 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
453 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
454 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
455 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
456 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
457 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
458 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
459 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
460 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
461 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4
462 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
463 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
464 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
465 // CHECK1: omp.precond.then:
466 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
467 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
468 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
469 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
470 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
471 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
472 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
473 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
474 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
475 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
476 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
477 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
478 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
479 // CHECK1: cond.true:
480 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
481 // CHECK1-NEXT: br label [[COND_END:%.*]]
482 // CHECK1: cond.false:
483 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
484 // CHECK1-NEXT: br label [[COND_END]]
485 // CHECK1: cond.end:
486 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
487 // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
488 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
489 // CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
490 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
491 // CHECK1: omp.inner.for.cond:
492 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
493 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
494 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
495 // CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
496 // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
497 // CHECK1: omp.inner.for.body:
498 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
499 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
500 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
501 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
502 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]]
503 // CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]]
504 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP25]]
505 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
506 // CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
507 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP25]]
508 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
509 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
510 // CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP25]]
511 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
512 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
513 // CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP25]]
514 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
515 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP25]]
516 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP25]]
517 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP25]]
518 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP25]]
519 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
520 // CHECK1: omp.inner.for.inc:
521 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
522 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
523 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
524 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
525 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
526 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
527 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
528 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
529 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
530 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
531 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
532 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
533 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
534 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
535 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
536 // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
537 // CHECK1: cond.true10:
538 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
539 // CHECK1-NEXT: br label [[COND_END12:%.*]]
540 // CHECK1: cond.false11:
541 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
542 // CHECK1-NEXT: br label [[COND_END12]]
543 // CHECK1: cond.end12:
544 // CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
545 // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
546 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
547 // CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
548 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
549 // CHECK1: omp.inner.for.end:
550 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
551 // CHECK1: omp.loop.exit:
552 // CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
553 // CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
554 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
555 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
556 // CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
557 // CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
558 // CHECK1: .omp.final.then:
559 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
560 // CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0
561 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
562 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
563 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
564 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
565 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
566 // CHECK1: .omp.final.done:
567 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
568 // CHECK1: omp.precond.end:
569 // CHECK1-NEXT: ret void
570 //
571 //
572 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined
573 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
574 // CHECK1-NEXT: entry:
575 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
576 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
577 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
578 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
579 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
580 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
581 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
582 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
583 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
584 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
585 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
586 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
587 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
588 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
589 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
590 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
591 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
592 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
593 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
594 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
595 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
596 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
597 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
598 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
599 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
600 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
601 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
602 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
603 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
604 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
605 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4
606 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
607 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
608 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
609 // CHECK1: omp.precond.then:
610 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
611 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
612 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
613 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
614 // CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
615 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
616 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
617 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
618 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
619 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
620 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
621 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
622 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
623 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
624 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
625 // CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
626 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
627 // CHECK1: omp.inner.for.cond:
628 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
629 // CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
630 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP28]]
631 // CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
632 // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
633 // CHECK1: omp.inner.for.body:
634 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
635 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
636 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
637 // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]]
638 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]]
639 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
640 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
641 // CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
642 // CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32
643 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1
644 // CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16
645 // CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
646 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
647 // CHECK1: omp.body.continue:
648 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
649 // CHECK1: omp.inner.for.inc:
650 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
651 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]]
652 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
653 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
654 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
655 // CHECK1: omp.inner.for.end:
656 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
657 // CHECK1: omp.loop.exit:
658 // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
659 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
660 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
661 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
662 // CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
663 // CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
664 // CHECK1: .omp.final.then:
665 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
666 // CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP21]], 0
667 // CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
668 // CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
669 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
670 // CHECK1-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
671 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
672 // CHECK1: .omp.final.done:
673 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
674 // CHECK1: omp.precond.end:
675 // CHECK1-NEXT: ret void
676 //
677 //
678 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37
679 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
680 // CHECK1-NEXT: entry:
681 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
682 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
683 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
684 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
685 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
686 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
687 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
688 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment, ptr [[DYN_PTR]])
689 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
690 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
691 // CHECK1: user_code.entry:
692 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
693 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
694 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
695 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
696 // CHECK1-NEXT: call void @__kmpc_target_deinit()
697 // CHECK1-NEXT: ret void
698 // CHECK1: worker.exit:
699 // CHECK1-NEXT: ret void
700 //
701 //
702 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined
703 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
704 // CHECK1-NEXT: entry:
705 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
706 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
707 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
708 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
709 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
710 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
711 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
712 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
713 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
714 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
715 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
716 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
717 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
718 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
719 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
720 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
721 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
722 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
723 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
724 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
725 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
726 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
727 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
728 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
729 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
730 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
731 // CHECK1: cond.true:
732 // CHECK1-NEXT: br label [[COND_END:%.*]]
733 // CHECK1: cond.false:
734 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
735 // CHECK1-NEXT: br label [[COND_END]]
736 // CHECK1: cond.end:
737 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
738 // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
739 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
740 // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
741 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
742 // CHECK1: omp.inner.for.cond:
743 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
744 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
745 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
746 // CHECK1: omp.inner.for.body:
747 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
748 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
749 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
750 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
751 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
752 // CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr
753 // CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP31]]
754 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
755 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr
756 // CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP31]]
757 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
758 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP31]]
759 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP31]]
760 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
761 // CHECK1: omp.inner.for.inc:
762 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
763 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
764 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
765 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
766 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
767 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
768 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
769 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
770 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
771 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
772 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
773 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
774 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
775 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9
776 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
777 // CHECK1: cond.true5:
778 // CHECK1-NEXT: br label [[COND_END7:%.*]]
779 // CHECK1: cond.false6:
780 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
781 // CHECK1-NEXT: br label [[COND_END7]]
782 // CHECK1: cond.end7:
783 // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ]
784 // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
785 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
786 // CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
787 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
788 // CHECK1: omp.inner.for.end:
789 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
790 // CHECK1: omp.loop.exit:
791 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
792 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
793 // CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
794 // CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
795 // CHECK1: .omp.final.then:
796 // CHECK1-NEXT: store i32 10, ptr [[I]], align 4
797 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
798 // CHECK1: .omp.final.done:
799 // CHECK1-NEXT: ret void
800 //
801 //
802 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined
803 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
804 // CHECK1-NEXT: entry:
805 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
806 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
807 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
808 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
809 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
810 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
811 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
812 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
813 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
814 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
815 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
816 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
817 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
818 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
819 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
820 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
821 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
822 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
823 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
824 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
825 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
826 // CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
827 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
828 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
829 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
830 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
831 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
832 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
833 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
834 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
835 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
836 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
837 // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
838 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
839 // CHECK1: omp.inner.for.cond:
840 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
841 // CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
842 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]]
843 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
844 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
845 // CHECK1: omp.inner.for.body:
846 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
847 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
848 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
849 // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
850 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
851 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
852 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
853 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
854 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
855 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
856 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
857 // CHECK1: omp.body.continue:
858 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
859 // CHECK1: omp.inner.for.inc:
860 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
861 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]]
862 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
863 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
864 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
865 // CHECK1: omp.inner.for.end:
866 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
867 // CHECK1: omp.loop.exit:
868 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
869 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
870 // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
871 // CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
872 // CHECK1: .omp.final.then:
873 // CHECK1-NEXT: store i32 10, ptr [[I]], align 4
874 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
875 // CHECK1: .omp.final.done:
876 // CHECK1-NEXT: ret void
877 //
878 //
879 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
880 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR0]] {
881 // CHECK1-NEXT: entry:
882 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
883 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
884 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
885 // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
886 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
887 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
888 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
889 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
890 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
891 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
892 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
893 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
894 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
895 // CHECK1: user_code.entry:
896 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
897 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
898 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
899 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
900 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
901 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
902 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
903 // CHECK1-NEXT: call void @__kmpc_target_deinit()
904 // CHECK1-NEXT: ret void
905 // CHECK1: worker.exit:
906 // CHECK1-NEXT: ret void
907 //
908 //
909 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
910 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
911 // CHECK1-NEXT: entry:
912 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
913 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
914 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
915 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
916 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
917 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
918 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
919 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
920 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
921 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
922 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
923 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
924 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
925 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
926 // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
927 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
928 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
929 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
930 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
931 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
932 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
933 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
934 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
935 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
936 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
937 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
938 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
939 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
940 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
941 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
942 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
943 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
944 // CHECK1: cond.true:
945 // CHECK1-NEXT: br label [[COND_END:%.*]]
946 // CHECK1: cond.false:
947 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
948 // CHECK1-NEXT: br label [[COND_END]]
949 // CHECK1: cond.end:
950 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
951 // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
952 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
953 // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
954 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
955 // CHECK1: omp.inner.for.cond:
956 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]]
957 // CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
958 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
959 // CHECK1: omp.inner.for.body:
960 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
961 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
962 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
963 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
964 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]]
965 // CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP37]]
966 // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8, !llvm.access.group [[ACC_GRP37]]
967 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
968 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
969 // CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP37]]
970 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
971 // CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
972 // CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP37]]
973 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
974 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP37]]
975 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
976 // CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
977 // CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP37]]
978 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP37]]
979 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
980 // CHECK1: omp.inner.for.inc:
981 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
982 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
983 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
984 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
985 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
986 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
987 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
988 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
989 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
990 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
991 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
992 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
993 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
994 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99
995 // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
996 // CHECK1: cond.true6:
997 // CHECK1-NEXT: br label [[COND_END8:%.*]]
998 // CHECK1: cond.false7:
999 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
1000 // CHECK1-NEXT: br label [[COND_END8]]
1001 // CHECK1: cond.end8:
1002 // CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ]
1003 // CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
1004 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
1005 // CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
1006 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
1007 // CHECK1: omp.inner.for.end:
1008 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1009 // CHECK1: omp.loop.exit:
1010 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
1011 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1012 // CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
1013 // CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1014 // CHECK1: .omp.final.then:
1015 // CHECK1-NEXT: store i32 10, ptr [[I]], align 4
1016 // CHECK1-NEXT: store i32 10, ptr [[J]], align 4
1017 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
1018 // CHECK1: .omp.final.done:
1019 // CHECK1-NEXT: ret void
1020 //
1021 //
1022 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined
1023 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
1024 // CHECK1-NEXT: entry:
1025 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1026 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1027 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
1028 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
1029 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
1030 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
1031 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1032 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
1033 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
1034 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1035 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1036 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1037 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1038 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
1039 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
1040 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
1041 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1042 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1043 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1044 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1045 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
1046 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
1047 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
1048 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1049 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
1050 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1051 // CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
1052 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1053 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
1054 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
1055 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
1056 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1057 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1058 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1059 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
1060 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
1061 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1062 // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1063 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1064 // CHECK1: omp.inner.for.cond:
1065 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]]
1066 // CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64
1067 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP40]]
1068 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]]
1069 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1070 // CHECK1: omp.inner.for.body:
1071 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
1072 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
1073 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
1074 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1075 // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
1076 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
1077 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
1078 // CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10
1079 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
1080 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]]
1081 // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
1082 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
1083 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
1084 // CHECK1-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
1085 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
1086 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
1087 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]]
1088 // CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
1089 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]]
1090 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
1091 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]]
1092 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
1093 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
1094 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
1095 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
1096 // CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64
1097 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
1098 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP40]]
1099 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1100 // CHECK1: omp.body.continue:
1101 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1102 // CHECK1: omp.inner.for.inc:
1103 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
1104 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]]
1105 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
1106 // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
1107 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]]
1108 // CHECK1: omp.inner.for.end:
1109 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1110 // CHECK1: omp.loop.exit:
1111 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
1112 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1113 // CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
1114 // CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1115 // CHECK1: .omp.final.then:
1116 // CHECK1-NEXT: store i32 10, ptr [[I]], align 4
1117 // CHECK1-NEXT: store i32 10, ptr [[J]], align 4
1118 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
1119 // CHECK1: .omp.final.done:
1120 // CHECK1-NEXT: ret void
1121 //
1122 //
1123 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
1124 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
1125 // CHECK2-NEXT: entry:
1126 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1127 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1128 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
1129 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1130 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1131 // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
1132 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1133 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1134 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1135 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1136 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
1137 // CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
1138 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
1139 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
1140 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1141 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1142 // CHECK2: user_code.entry:
1143 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
1144 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
1145 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
1146 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
1147 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
1148 // CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
1149 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
1150 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1151 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1152 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
1153 // CHECK2-NEXT: call void @__kmpc_target_deinit()
1154 // CHECK2-NEXT: ret void
1155 // CHECK2: worker.exit:
1156 // CHECK2-NEXT: ret void
1157 //
1158 //
1159 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
1160 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
1161 // CHECK2-NEXT: entry:
1162 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1163 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1164 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1165 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
1166 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1167 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1168 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1169 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1170 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1171 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1172 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1173 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1174 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1175 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1176 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
1177 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1178 // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
1179 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
1180 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1181 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1182 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1183 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
1184 // CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
1185 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
1186 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
1187 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
1188 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1189 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1190 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1191 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1192 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
1193 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4
1194 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1195 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1196 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1197 // CHECK2: omp.precond.then:
1198 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
1199 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1200 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
1201 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1202 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1203 // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1204 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
1205 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
1206 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1207 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1208 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
1209 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1210 // CHECK2: cond.true:
1211 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1212 // CHECK2-NEXT: br label [[COND_END:%.*]]
1213 // CHECK2: cond.false:
1214 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1215 // CHECK2-NEXT: br label [[COND_END]]
1216 // CHECK2: cond.end:
1217 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
1218 // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
1219 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1220 // CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
1221 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1222 // CHECK2: omp.inner.for.cond:
1223 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
1224 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
1225 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
1226 // CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
1227 // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1228 // CHECK2: omp.inner.for.body:
1229 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
1230 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1231 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
1232 // CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
1233 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
1234 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
1235 // CHECK2-NEXT: store i32 [[TMP18]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
1236 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
1237 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
1238 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr
1239 // CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP18]]
1240 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
1241 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr
1242 // CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP18]]
1243 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
1244 // CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr
1245 // CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP18]]
1246 // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
1247 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP18]]
1248 // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
1249 // CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr
1250 // CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP18]]
1251 // CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
1252 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP18]]
1253 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP18]]
1254 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1255 // CHECK2: omp.inner.for.inc:
1256 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1257 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
1258 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
1259 // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1260 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
1261 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
1262 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
1263 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
1264 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1265 // CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
1266 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
1267 // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1268 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1269 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
1270 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
1271 // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
1272 // CHECK2: cond.true10:
1273 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
1274 // CHECK2-NEXT: br label [[COND_END12:%.*]]
1275 // CHECK2: cond.false11:
1276 // CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1277 // CHECK2-NEXT: br label [[COND_END12]]
1278 // CHECK2: cond.end12:
1279 // CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
1280 // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
1281 // CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
1282 // CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
1283 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1284 // CHECK2: omp.inner.for.end:
1285 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1286 // CHECK2: omp.loop.exit:
1287 // CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1288 // CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
1289 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
1290 // CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1291 // CHECK2-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
1292 // CHECK2-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1293 // CHECK2: .omp.final.then:
1294 // CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1295 // CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP46]], 0
1296 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
1297 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
1298 // CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
1299 // CHECK2-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
1300 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1301 // CHECK2: .omp.final.done:
1302 // CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1303 // CHECK2-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
1304 // CHECK2-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
1305 // CHECK2: .omp.lastprivate.then:
1306 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[L_ADDR]], align 4
1307 // CHECK2-NEXT: store i32 [[TMP49]], ptr [[L_ADDR]], align 4
1308 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
1309 // CHECK2: .omp.lastprivate.done:
1310 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1311 // CHECK2: omp.precond.end:
1312 // CHECK2-NEXT: ret void
1313 //
1314 //
1315 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined
1316 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] {
1317 // CHECK2-NEXT: entry:
1318 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1319 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1320 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1321 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1322 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1323 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
1324 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1325 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1326 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1327 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1328 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1329 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1330 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1331 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1332 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1333 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1334 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
1335 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1336 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1337 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
1338 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1339 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1340 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
1341 // CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
1342 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
1343 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
1344 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
1345 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1346 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1347 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1348 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1349 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
1350 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4
1351 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1352 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1353 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1354 // CHECK2: omp.precond.then:
1355 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1356 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1357 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
1358 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
1359 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1360 // CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
1361 // CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
1362 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1363 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1364 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1365 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
1366 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32)
1367 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1368 // CHECK2: omp.dispatch.cond:
1369 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1370 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1371 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
1372 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1373 // CHECK2: cond.true:
1374 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1375 // CHECK2-NEXT: br label [[COND_END:%.*]]
1376 // CHECK2: cond.false:
1377 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1378 // CHECK2-NEXT: br label [[COND_END]]
1379 // CHECK2: cond.end:
1380 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
1381 // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1382 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1383 // CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
1384 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1385 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1386 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
1387 // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1388 // CHECK2: omp.dispatch.body:
1389 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1390 // CHECK2: omp.inner.for.cond:
1391 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
1392 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
1393 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
1394 // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1395 // CHECK2: omp.inner.for.body:
1396 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
1397 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
1398 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1399 // CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
1400 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
1401 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]]
1402 // CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]]
1403 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
1404 // CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]]
1405 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1406 // CHECK2: omp.body.continue:
1407 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1408 // CHECK2: omp.inner.for.inc:
1409 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
1410 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1
1411 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
1412 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
1413 // CHECK2: omp.inner.for.end:
1414 // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1415 // CHECK2: omp.dispatch.inc:
1416 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1417 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1418 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1419 // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
1420 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1421 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1422 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
1423 // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
1424 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
1425 // CHECK2: omp.dispatch.end:
1426 // CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1427 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
1428 // CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
1429 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1430 // CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
1431 // CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1432 // CHECK2: .omp.final.then:
1433 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1434 // CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0
1435 // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
1436 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
1437 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
1438 // CHECK2-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
1439 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1440 // CHECK2: .omp.final.done:
1441 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1442 // CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
1443 // CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
1444 // CHECK2: .omp.lastprivate.then:
1445 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4
1446 // CHECK2-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4
1447 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
1448 // CHECK2: .omp.lastprivate.done:
1449 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1450 // CHECK2: omp.precond.end:
1451 // CHECK2-NEXT: ret void
1452 //
1453 //
1454 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
1455 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
1456 // CHECK2-NEXT: entry:
1457 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1458 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1459 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
1460 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1461 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1462 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1463 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1464 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1465 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
1466 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
1467 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment, ptr [[DYN_PTR]])
1468 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1469 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1470 // CHECK2: user_code.entry:
1471 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
1472 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
1473 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
1474 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
1475 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1476 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1477 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
1478 // CHECK2-NEXT: call void @__kmpc_target_deinit()
1479 // CHECK2-NEXT: ret void
1480 // CHECK2: worker.exit:
1481 // CHECK2-NEXT: ret void
1482 //
1483 //
1484 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined
1485 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
1486 // CHECK2-NEXT: entry:
1487 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1488 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1489 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1490 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
1491 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1492 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1493 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1494 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1495 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1496 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1497 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1498 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1499 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1500 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
1501 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1502 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
1503 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1504 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1505 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1506 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
1507 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
1508 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
1509 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
1510 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1511 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1512 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1513 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1514 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
1515 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4
1516 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1517 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1518 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1519 // CHECK2: omp.precond.then:
1520 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
1521 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1522 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
1523 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1524 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1525 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1526 // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1527 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
1528 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1529 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1530 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1531 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
1532 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1533 // CHECK2: cond.true:
1534 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1535 // CHECK2-NEXT: br label [[COND_END:%.*]]
1536 // CHECK2: cond.false:
1537 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1538 // CHECK2-NEXT: br label [[COND_END]]
1539 // CHECK2: cond.end:
1540 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
1541 // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
1542 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1543 // CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
1544 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1545 // CHECK2: omp.inner.for.cond:
1546 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
1547 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
1548 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
1549 // CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
1550 // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1551 // CHECK2: omp.inner.for.body:
1552 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
1553 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1554 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]]
1555 // CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]]
1556 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]]
1557 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
1558 // CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr
1559 // CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP25]]
1560 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
1561 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr
1562 // CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP25]]
1563 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
1564 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr
1565 // CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP25]]
1566 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
1567 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP25]]
1568 // CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP25]]
1569 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP25]]
1570 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP25]]
1571 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1572 // CHECK2: omp.inner.for.inc:
1573 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1574 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
1575 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
1576 // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1577 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
1578 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
1579 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
1580 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
1581 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1582 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
1583 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
1584 // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1585 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1586 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
1587 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
1588 // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
1589 // CHECK2: cond.true10:
1590 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
1591 // CHECK2-NEXT: br label [[COND_END12:%.*]]
1592 // CHECK2: cond.false11:
1593 // CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1594 // CHECK2-NEXT: br label [[COND_END12]]
1595 // CHECK2: cond.end12:
1596 // CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ]
1597 // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
1598 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
1599 // CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
1600 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
1601 // CHECK2: omp.inner.for.end:
1602 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1603 // CHECK2: omp.loop.exit:
1604 // CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1605 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
1606 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]])
1607 // CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1608 // CHECK2-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
1609 // CHECK2-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1610 // CHECK2: .omp.final.then:
1611 // CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1612 // CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP42]], 0
1613 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
1614 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
1615 // CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
1616 // CHECK2-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
1617 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1618 // CHECK2: .omp.final.done:
1619 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1620 // CHECK2: omp.precond.end:
1621 // CHECK2-NEXT: ret void
1622 //
1623 //
1624 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined
1625 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
1626 // CHECK2-NEXT: entry:
1627 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1628 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1629 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1630 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1631 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1632 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
1633 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1634 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1635 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1636 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1637 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1638 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1639 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1640 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1641 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1642 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
1643 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1644 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1645 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
1646 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1647 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
1648 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
1649 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
1650 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
1651 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
1652 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1653 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1654 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1655 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1656 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
1657 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4
1658 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1659 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1660 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1661 // CHECK2: omp.precond.then:
1662 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1663 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
1664 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
1665 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
1666 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1667 // CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
1668 // CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
1669 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1670 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1671 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1672 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
1673 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
1674 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1675 // CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
1676 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1677 // CHECK2: omp.inner.for.cond:
1678 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
1679 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
1680 // CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
1681 // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1682 // CHECK2: omp.inner.for.body:
1683 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1684 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
1685 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1686 // CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
1687 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
1688 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]]
1689 // CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
1690 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
1691 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1
1692 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16
1693 // CHECK2-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
1694 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1695 // CHECK2: omp.body.continue:
1696 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1697 // CHECK2: omp.inner.for.inc:
1698 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1699 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]]
1700 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
1701 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
1702 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
1703 // CHECK2: omp.inner.for.end:
1704 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1705 // CHECK2: omp.loop.exit:
1706 // CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1707 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
1708 // CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
1709 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1710 // CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
1711 // CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1712 // CHECK2: .omp.final.then:
1713 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1714 // CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0
1715 // CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
1716 // CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1
1717 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
1718 // CHECK2-NEXT: store i32 [[ADD11]], ptr [[I3]], align 4
1719 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1720 // CHECK2: .omp.final.done:
1721 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1722 // CHECK2: omp.precond.end:
1723 // CHECK2-NEXT: ret void
1724 //
1725 //
1726 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37
1727 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
1728 // CHECK2-NEXT: entry:
1729 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1730 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
1731 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1732 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1733 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1734 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
1735 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
1736 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment, ptr [[DYN_PTR]])
1737 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1738 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1739 // CHECK2: user_code.entry:
1740 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
1741 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1742 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1743 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
1744 // CHECK2-NEXT: call void @__kmpc_target_deinit()
1745 // CHECK2-NEXT: ret void
1746 // CHECK2: worker.exit:
1747 // CHECK2-NEXT: ret void
1748 //
1749 //
1750 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined
1751 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
1752 // CHECK2-NEXT: entry:
1753 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1754 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1755 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
1756 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1757 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1758 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1759 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1760 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1761 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1762 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1763 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
1764 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1765 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1766 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
1767 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
1768 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
1769 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
1770 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1771 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1772 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1773 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1774 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
1775 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1776 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1777 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
1778 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1779 // CHECK2: cond.true:
1780 // CHECK2-NEXT: br label [[COND_END:%.*]]
1781 // CHECK2: cond.false:
1782 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1783 // CHECK2-NEXT: br label [[COND_END]]
1784 // CHECK2: cond.end:
1785 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1786 // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
1787 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1788 // CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1789 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1790 // CHECK2: omp.inner.for.cond:
1791 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
1792 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
1793 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1794 // CHECK2: omp.inner.for.body:
1795 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
1796 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
1797 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
1798 // CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr
1799 // CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP31]]
1800 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
1801 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr
1802 // CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP31]]
1803 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
1804 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]]
1805 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP31]]
1806 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1807 // CHECK2: omp.inner.for.inc:
1808 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
1809 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
1810 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
1811 // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
1812 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
1813 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
1814 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
1815 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
1816 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
1817 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
1818 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
1819 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
1820 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
1821 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
1822 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
1823 // CHECK2: cond.true5:
1824 // CHECK2-NEXT: br label [[COND_END7:%.*]]
1825 // CHECK2: cond.false6:
1826 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
1827 // CHECK2-NEXT: br label [[COND_END7]]
1828 // CHECK2: cond.end7:
1829 // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
1830 // CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
1831 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
1832 // CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
1833 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
1834 // CHECK2: omp.inner.for.end:
1835 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1836 // CHECK2: omp.loop.exit:
1837 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
1838 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1839 // CHECK2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
1840 // CHECK2-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1841 // CHECK2: .omp.final.then:
1842 // CHECK2-NEXT: store i32 10, ptr [[I]], align 4
1843 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1844 // CHECK2: .omp.final.done:
1845 // CHECK2-NEXT: ret void
1846 //
1847 //
1848 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined
1849 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
1850 // CHECK2-NEXT: entry:
1851 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1852 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1853 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1854 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1855 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
1856 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1857 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1858 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1859 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1860 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1861 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1862 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1863 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1864 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1865 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
1866 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1867 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
1868 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
1869 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1870 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
1871 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
1872 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
1873 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
1874 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
1875 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1876 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1877 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1878 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
1879 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
1880 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1881 // CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1882 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1883 // CHECK2: omp.inner.for.cond:
1884 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
1885 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP34]]
1886 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
1887 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1888 // CHECK2: omp.inner.for.body:
1889 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
1890 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
1891 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1892 // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
1893 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
1894 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]]
1895 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
1896 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1
1897 // CHECK2-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
1898 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1899 // CHECK2: omp.body.continue:
1900 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1901 // CHECK2: omp.inner.for.inc:
1902 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
1903 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]]
1904 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
1905 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
1906 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
1907 // CHECK2: omp.inner.for.end:
1908 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1909 // CHECK2: omp.loop.exit:
1910 // CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
1911 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
1912 // CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
1913 // CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1914 // CHECK2: .omp.final.then:
1915 // CHECK2-NEXT: store i32 10, ptr [[I]], align 4
1916 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1917 // CHECK2: .omp.final.done:
1918 // CHECK2-NEXT: ret void
1919 //
1920 //
1921 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
1922 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR0]] {
1923 // CHECK2-NEXT: entry:
1924 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
1925 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
1926 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1927 // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
1928 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1929 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1930 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
1931 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
1932 // CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
1933 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
1934 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
1935 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1936 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1937 // CHECK2: user_code.entry:
1938 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
1939 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
1940 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
1941 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
1942 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
1943 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
1944 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
1945 // CHECK2-NEXT: call void @__kmpc_target_deinit()
1946 // CHECK2-NEXT: ret void
1947 // CHECK2: worker.exit:
1948 // CHECK2-NEXT: ret void
1949 //
1950 //
1951 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
1952 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
1953 // CHECK2-NEXT: entry:
1954 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
1955 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
1956 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
1957 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1958 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1959 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1960 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
1961 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1962 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1963 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1964 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1965 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
1966 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1967 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
1968 // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
1969 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
1970 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
1971 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
1972 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
1973 // CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
1974 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
1975 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
1976 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
1977 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1978 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1979 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1980 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
1981 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
1982 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1983 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1984 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
1985 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1986 // CHECK2: cond.true:
1987 // CHECK2-NEXT: br label [[COND_END:%.*]]
1988 // CHECK2: cond.false:
1989 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1990 // CHECK2-NEXT: br label [[COND_END]]
1991 // CHECK2: cond.end:
1992 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1993 // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
1994 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1995 // CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
1996 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1997 // CHECK2: omp.inner.for.cond:
1998 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]]
1999 // CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
2000 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2001 // CHECK2: omp.inner.for.body:
2002 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
2003 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
2004 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]]
2005 // CHECK2-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP37]]
2006 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP37]]
2007 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
2008 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr
2009 // CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP37]]
2010 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
2011 // CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr
2012 // CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP37]]
2013 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
2014 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP37]]
2015 // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
2016 // CHECK2-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr
2017 // CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP37]]
2018 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP37]]
2019 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2020 // CHECK2: omp.inner.for.inc:
2021 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
2022 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
2023 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
2024 // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
2025 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
2026 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
2027 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
2028 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
2029 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
2030 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
2031 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
2032 // CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
2033 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
2034 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99
2035 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
2036 // CHECK2: cond.true6:
2037 // CHECK2-NEXT: br label [[COND_END8:%.*]]
2038 // CHECK2: cond.false7:
2039 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
2040 // CHECK2-NEXT: br label [[COND_END8]]
2041 // CHECK2: cond.end8:
2042 // CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ]
2043 // CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
2044 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
2045 // CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
2046 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
2047 // CHECK2: omp.inner.for.end:
2048 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2049 // CHECK2: omp.loop.exit:
2050 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
2051 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2052 // CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
2053 // CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2054 // CHECK2: .omp.final.then:
2055 // CHECK2-NEXT: store i32 10, ptr [[I]], align 4
2056 // CHECK2-NEXT: store i32 10, ptr [[J]], align 4
2057 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
2058 // CHECK2: .omp.final.done:
2059 // CHECK2-NEXT: ret void
2060 //
2061 //
2062 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined
2063 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
2064 // CHECK2-NEXT: entry:
2065 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
2066 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
2067 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
2068 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
2069 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
2070 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
2071 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2072 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
2073 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
2074 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2075 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2076 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2077 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2078 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
2079 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
2080 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
2081 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
2082 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
2083 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
2084 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
2085 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
2086 // CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
2087 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
2088 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2089 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
2090 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
2091 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
2092 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
2093 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
2094 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2095 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2096 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
2097 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
2098 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
2099 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2100 // CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
2101 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2102 // CHECK2: omp.inner.for.cond:
2103 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]]
2104 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]]
2105 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
2106 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2107 // CHECK2: omp.inner.for.body:
2108 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
2109 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
2110 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
2111 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2112 // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
2113 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
2114 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
2115 // CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10
2116 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10
2117 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]]
2118 // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
2119 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
2120 // CHECK2-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
2121 // CHECK2-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
2122 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
2123 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
2124 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]]
2125 // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
2126 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]]
2127 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
2128 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]]
2129 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
2130 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]]
2131 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
2132 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]]
2133 // CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP40]]
2134 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2135 // CHECK2: omp.body.continue:
2136 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2137 // CHECK2: omp.inner.for.inc:
2138 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
2139 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]]
2140 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
2141 // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
2142 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]]
2143 // CHECK2: omp.inner.for.end:
2144 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2145 // CHECK2: omp.loop.exit:
2146 // CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
2147 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
2148 // CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
2149 // CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2150 // CHECK2: .omp.final.then:
2151 // CHECK2-NEXT: store i32 10, ptr [[I]], align 4
2152 // CHECK2-NEXT: store i32 10, ptr [[J]], align 4
2153 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
2154 // CHECK2: .omp.final.done:
2155 // CHECK2-NEXT: ret void
2156 //
2157