1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // REQUIRES: amdgpu-registered-target
3
4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU
6
7 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
8
9 // Check same results after serialization round-trip
10 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
11 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
12
13
14 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm-bc %s -o %t-ppc-host.bc
15 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-assume-no-nested-parallelism -DNESTED -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU-NESTED
16
17 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-NESTED
18
19 // Check same results after serialization round-trip
20 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-pch -o %t %s
21 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH-NESTED
22
23 // expected-no-diagnostics
24
25 #ifndef NESTED
26 extern int omp_get_num_teams(void);
27 #endif
28
29 #ifndef HEADER
30 #define HEADER
31 extern int foo(int i);
32
33 int N = 100000;
main()34 int main()
35 {
36 int a[N];
37 int b[N];
38
39 #ifndef NESTED
40 // Should be transformed into 'target teams distribute parallel for'
41 #pragma omp target teams loop
42 for (int j = 0; j != N; j++)
43 a[j]=b[j];
44
45 // Should be transformed into 'target teams distribute parallel for'
46 #pragma omp target teams loop collapse(2)
47 for (int i = 0; i < N; i++) {
48 for (int j = 0; j < N; j++) {
49 a[i] = b[i] * N + j;
50 }
51 }
52
53 int nt = 0;
54 // Should be transformed into 'target teams distribute parallel for'
55 #pragma omp target teams loop num_teams(32)
56 for (int i=0; i < N; i++) {
57 if (!nt) nt = omp_get_num_teams();
58 for (int j=0; j < N; j++)
59 a[j] = b[j] * N + nt;
60 }
61 #else
62 // Should be transformed into 'target teams distribute parallel for'
63 // even with function call because of assume-no-nested-parallelism.
64 #pragma omp target teams loop collapse(2)
65 for (int i = 0; i < N; i++) {
66 for (int j = 0; j < N; j++) {
67 a[i] = b[i] * N + foo(j);
68 }
69 }
70 #endif
71 return 0;
72 }
73 #endif
74 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
75 // IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
76 // IR-GPU-NEXT: entry:
77 // IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
78 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
79 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
80 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
81 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
82 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
83 // IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
84 // IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
85 // IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
86 // IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
87 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
88 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
89 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
90 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
91 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
92 // IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
93 // IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
94 // IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
95 // IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
96 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
97 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
98 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
99 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
100 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
101 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
102 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
103 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
104 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
105 // IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment to ptr), ptr [[DYN_PTR]])
106 // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
107 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
108 // IR-GPU: user_code.entry:
109 // IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
110 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
111 // IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
112 // IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
113 // IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
114 // IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
115 // IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]]
116 // IR-GPU-NEXT: call void @__kmpc_target_deinit()
117 // IR-GPU-NEXT: ret void
118 // IR-GPU: worker.exit:
119 // IR-GPU-NEXT: ret void
120 //
121 //
122 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined
123 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] {
124 // IR-GPU-NEXT: entry:
125 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
126 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
127 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
128 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
129 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
130 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
131 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
132 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
133 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
134 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
135 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
136 // IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
137 // IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
138 // IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
139 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
140 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
141 // IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5)
142 // IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
143 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
144 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
145 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
146 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
147 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
148 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
149 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
150 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
151 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
152 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
153 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
154 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
155 // IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
156 // IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
157 // IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
158 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
159 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
160 // IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr
161 // IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
162 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
163 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
164 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
165 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
166 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
167 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
168 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
169 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
170 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
171 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
172 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
173 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
174 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
175 // IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
176 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
177 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
178 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
179 // IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
180 // IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
181 // IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
182 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
183 // IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
184 // IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
185 // IR-GPU: omp.precond.then:
186 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
187 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
188 // IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
189 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
190 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
191 // IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
192 // IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
193 // IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
194 // IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
195 // IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
196 // IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
197 // IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
198 // IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
199 // IR-GPU: cond.true:
200 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
201 // IR-GPU-NEXT: br label [[COND_END:%.*]]
202 // IR-GPU: cond.false:
203 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
204 // IR-GPU-NEXT: br label [[COND_END]]
205 // IR-GPU: cond.end:
206 // IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
207 // IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
208 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
209 // IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
210 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
211 // IR-GPU: omp.inner.for.cond:
212 // IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
213 // IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
214 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
215 // IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
216 // IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
217 // IR-GPU: omp.inner.for.body:
218 // IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
219 // IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
220 // IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
221 // IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
222 // IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
223 // IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
224 // IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
225 // IR-GPU-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
226 // IR-GPU-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
227 // IR-GPU-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
228 // IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
229 // IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
230 // IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
231 // IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
232 // IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr
233 // IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
234 // IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
235 // IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP0]] to ptr
236 // IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
237 // IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
238 // IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8
239 // IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
240 // IR-GPU-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP2]] to ptr
241 // IR-GPU-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8
242 // IR-GPU-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
243 // IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8
244 // IR-GPU-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
245 // IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
246 // IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
247 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
248 // IR-GPU: omp.inner.for.inc:
249 // IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
250 // IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
251 // IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
252 // IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4
253 // IR-GPU-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
254 // IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
255 // IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]]
256 // IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
257 // IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
258 // IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
259 // IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
260 // IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
261 // IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
262 // IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
263 // IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]]
264 // IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
265 // IR-GPU: cond.true12:
266 // IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
267 // IR-GPU-NEXT: br label [[COND_END14:%.*]]
268 // IR-GPU: cond.false13:
269 // IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
270 // IR-GPU-NEXT: br label [[COND_END14]]
271 // IR-GPU: cond.end14:
272 // IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE12]] ], [ [[TMP46]], [[COND_FALSE13]] ]
273 // IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
274 // IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
275 // IR-GPU-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV_ASCAST]], align 4
276 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
277 // IR-GPU: omp.inner.for.end:
278 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
279 // IR-GPU: omp.loop.exit:
280 // IR-GPU-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
281 // IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
282 // IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP49]])
283 // IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
284 // IR-GPU: omp.precond.end:
285 // IR-GPU-NEXT: ret void
286 //
287 //
288 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined
289 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
290 // IR-GPU-NEXT: entry:
291 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
292 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
293 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
294 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
295 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
296 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
297 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
298 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
299 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
300 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
301 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
302 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
303 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
304 // IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
305 // IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
306 // IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
307 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
308 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
309 // IR-GPU-NEXT: [[J6:%.*]] = alloca i32, align 4, addrspace(5)
310 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
311 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
312 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
313 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
314 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
315 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
316 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
317 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
318 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
319 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
320 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
321 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
322 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
323 // IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
324 // IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
325 // IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
326 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
327 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
328 // IR-GPU-NEXT: [[J6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J6]] to ptr
329 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
330 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
331 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
332 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
333 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
334 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
335 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
336 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
337 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
338 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
339 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
340 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
341 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
342 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
343 // IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
344 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
345 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
346 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
347 // IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
348 // IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
349 // IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
350 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
351 // IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
352 // IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
353 // IR-GPU: omp.precond.then:
354 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
355 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
356 // IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4
357 // IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
358 // IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
359 // IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
360 // IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
361 // IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
362 // IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4
363 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
364 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
365 // IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
366 // IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
367 // IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
368 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
369 // IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
370 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
371 // IR-GPU: omp.inner.for.cond:
372 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
373 // IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64
374 // IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
375 // IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]]
376 // IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
377 // IR-GPU: omp.inner.for.body:
378 // IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
379 // IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
380 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
381 // IR-GPU-NEXT: store i32 [[ADD]], ptr [[J6_ASCAST]], align 4
382 // IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[J6_ASCAST]], align 4
383 // IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
384 // IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
385 // IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
386 // IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[J6_ASCAST]], align 4
387 // IR-GPU-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64
388 // IR-GPU-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]]
389 // IR-GPU-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX10]], align 4
390 // IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
391 // IR-GPU: omp.body.continue:
392 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
393 // IR-GPU: omp.inner.for.inc:
394 // IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
395 // IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
396 // IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
397 // IR-GPU-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV_ASCAST]], align 4
398 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
399 // IR-GPU: omp.inner.for.end:
400 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
401 // IR-GPU: omp.loop.exit:
402 // IR-GPU-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
403 // IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
404 // IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP22]])
405 // IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
406 // IR-GPU: omp.precond.end:
407 // IR-GPU-NEXT: ret void
408 //
409 //
410 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46
411 // IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
412 // IR-GPU-NEXT: entry:
413 // IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
414 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
415 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
416 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
417 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
418 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
419 // IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
420 // IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
421 // IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
422 // IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
423 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
424 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
425 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
426 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
427 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
428 // IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
429 // IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
430 // IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
431 // IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
432 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
433 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
434 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
435 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
436 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
437 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
438 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
439 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
440 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
441 // IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_kernel_environment to ptr), ptr [[DYN_PTR]])
442 // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
443 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
444 // IR-GPU: user_code.entry:
445 // IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
446 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
447 // IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
448 // IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
449 // IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
450 // IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
451 // IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]]
452 // IR-GPU-NEXT: call void @__kmpc_target_deinit()
453 // IR-GPU-NEXT: ret void
454 // IR-GPU: worker.exit:
455 // IR-GPU-NEXT: ret void
456 //
457 //
458 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined
459 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
460 // IR-GPU-NEXT: entry:
461 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
462 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
463 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
464 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
465 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
466 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
467 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
468 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
469 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
470 // IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
471 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
472 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
473 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
474 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
475 // IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
476 // IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
477 // IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
478 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
479 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
480 // IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
481 // IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
482 // IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
483 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
484 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
485 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
486 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
487 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
488 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
489 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
490 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
491 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
492 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
493 // IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
494 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
495 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
496 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
497 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
498 // IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
499 // IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
500 // IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
501 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
502 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
503 // IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
504 // IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
505 // IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
506 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
507 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
508 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
509 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
510 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
511 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
512 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
513 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
514 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
515 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
516 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
517 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
518 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
519 // IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
520 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
521 // IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
522 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
523 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
524 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
525 // IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
526 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
527 // IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
528 // IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
529 // IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
530 // IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
531 // IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
532 // IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
533 // IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
534 // IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
535 // IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
536 // IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
537 // IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
538 // IR-GPU: land.lhs.true:
539 // IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
540 // IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
541 // IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
542 // IR-GPU: omp.precond.then:
543 // IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
544 // IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
545 // IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
546 // IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
547 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
548 // IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
549 // IR-GPU-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
550 // IR-GPU-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
551 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
552 // IR-GPU-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]])
553 // IR-GPU-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
554 // IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
555 // IR-GPU-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
556 // IR-GPU-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
557 // IR-GPU: cond.true:
558 // IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
559 // IR-GPU-NEXT: br label [[COND_END:%.*]]
560 // IR-GPU: cond.false:
561 // IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
562 // IR-GPU-NEXT: br label [[COND_END]]
563 // IR-GPU: cond.end:
564 // IR-GPU-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
565 // IR-GPU-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
566 // IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
567 // IR-GPU-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8
568 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
569 // IR-GPU: omp.inner.for.cond:
570 // IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
571 // IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
572 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1
573 // IR-GPU-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]]
574 // IR-GPU-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
575 // IR-GPU: omp.inner.for.body:
576 // IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
577 // IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
578 // IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
579 // IR-GPU-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4
580 // IR-GPU-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
581 // IR-GPU-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
582 // IR-GPU-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr
583 // IR-GPU-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
584 // IR-GPU-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
585 // IR-GPU-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr
586 // IR-GPU-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
587 // IR-GPU-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
588 // IR-GPU-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr
589 // IR-GPU-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8
590 // IR-GPU-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
591 // IR-GPU-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr
592 // IR-GPU-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8
593 // IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
594 // IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8
595 // IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
596 // IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr
597 // IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8
598 // IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
599 // IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8
600 // IR-GPU-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
601 // IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
602 // IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
603 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
604 // IR-GPU: omp.inner.for.inc:
605 // IR-GPU-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
606 // IR-GPU-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
607 // IR-GPU-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]]
608 // IR-GPU-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8
609 // IR-GPU-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
610 // IR-GPU-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
611 // IR-GPU-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]]
612 // IR-GPU-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
613 // IR-GPU-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
614 // IR-GPU-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
615 // IR-GPU-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]]
616 // IR-GPU-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
617 // IR-GPU-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
618 // IR-GPU-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
619 // IR-GPU-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]]
620 // IR-GPU-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]]
621 // IR-GPU: cond.true20:
622 // IR-GPU-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
623 // IR-GPU-NEXT: br label [[COND_END22:%.*]]
624 // IR-GPU: cond.false21:
625 // IR-GPU-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
626 // IR-GPU-NEXT: br label [[COND_END22]]
627 // IR-GPU: cond.end22:
628 // IR-GPU-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ]
629 // IR-GPU-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
630 // IR-GPU-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
631 // IR-GPU-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8
632 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
633 // IR-GPU: omp.inner.for.end:
634 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
635 // IR-GPU: omp.loop.exit:
636 // IR-GPU-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
637 // IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4
638 // IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]])
639 // IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
640 // IR-GPU: omp.precond.end:
641 // IR-GPU-NEXT: ret void
642 //
643 //
644 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined
645 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
646 // IR-GPU-NEXT: entry:
647 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
648 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
649 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
650 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
651 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
652 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
653 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
654 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
655 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
656 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
657 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
658 // IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
659 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
660 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
661 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
662 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
663 // IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
664 // IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
665 // IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
666 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
667 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
668 // IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
669 // IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
670 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
671 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
672 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
673 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
674 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
675 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
676 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
677 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
678 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
679 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
680 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
681 // IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
682 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
683 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
684 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
685 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
686 // IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
687 // IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
688 // IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
689 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
690 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
691 // IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
692 // IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
693 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
694 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
695 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
696 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
697 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
698 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
699 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
700 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
701 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
702 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
703 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
704 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
705 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
706 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
707 // IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
708 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
709 // IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
710 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
711 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
712 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
713 // IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
714 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
715 // IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
716 // IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
717 // IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
718 // IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
719 // IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
720 // IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
721 // IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
722 // IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
723 // IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
724 // IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
725 // IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
726 // IR-GPU: land.lhs.true:
727 // IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
728 // IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
729 // IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
730 // IR-GPU: omp.precond.then:
731 // IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
732 // IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
733 // IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8
734 // IR-GPU-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
735 // IR-GPU-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
736 // IR-GPU-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8
737 // IR-GPU-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8
738 // IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
739 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
740 // IR-GPU-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
741 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
742 // IR-GPU-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
743 // IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
744 // IR-GPU-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8
745 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
746 // IR-GPU: omp.inner.for.cond:
747 // IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
748 // IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
749 // IR-GPU-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]]
750 // IR-GPU-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
751 // IR-GPU: omp.inner.for.body:
752 // IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
753 // IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
754 // IR-GPU-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0
755 // IR-GPU-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
756 // IR-GPU-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]]
757 // IR-GPU-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64
758 // IR-GPU-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]]
759 // IR-GPU-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1
760 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]]
761 // IR-GPU-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32
762 // IR-GPU-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4
763 // IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
764 // IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
765 // IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
766 // IR-GPU-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0
767 // IR-GPU-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1
768 // IR-GPU-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]]
769 // IR-GPU-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64
770 // IR-GPU-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]]
771 // IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
772 // IR-GPU-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0
773 // IR-GPU-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
774 // IR-GPU-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]]
775 // IR-GPU-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64
776 // IR-GPU-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]]
777 // IR-GPU-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]]
778 // IR-GPU-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1
779 // IR-GPU-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]]
780 // IR-GPU-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32
781 // IR-GPU-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4
782 // IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
783 // IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64
784 // IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
785 // IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
786 // IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
787 // IR-GPU-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]]
788 // IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
789 // IR-GPU-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[TMP27]]
790 // IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
791 // IR-GPU-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64
792 // IR-GPU-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]]
793 // IR-GPU-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4
794 // IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
795 // IR-GPU: omp.body.continue:
796 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
797 // IR-GPU: omp.inner.for.inc:
798 // IR-GPU-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
799 // IR-GPU-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
800 // IR-GPU-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]]
801 // IR-GPU-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8
802 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
803 // IR-GPU: omp.inner.for.end:
804 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
805 // IR-GPU: omp.loop.exit:
806 // IR-GPU-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
807 // IR-GPU-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
808 // IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP32]])
809 // IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
810 // IR-GPU: omp.precond.end:
811 // IR-GPU-NEXT: ret void
812 //
813 //
814 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55
815 // IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
816 // IR-GPU-NEXT: entry:
817 // IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
818 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
819 // IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
820 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
821 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
822 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
823 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
824 // IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
825 // IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
826 // IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
827 // IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
828 // IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
829 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
830 // IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr
831 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
832 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
833 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
834 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
835 // IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
836 // IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr
837 // IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
838 // IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
839 // IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
840 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
841 // IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8
842 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
843 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
844 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
845 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
846 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
847 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
848 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
849 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
850 // IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_kernel_environment to ptr), ptr [[DYN_PTR]])
851 // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
852 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
853 // IR-GPU: user_code.entry:
854 // IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
855 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
856 // IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
857 // IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
858 // IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
859 // IR-GPU-NEXT: store i32 [[TMP8]], ptr [[NT_CASTED_ASCAST]], align 4
860 // IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8
861 // IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
862 // IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
863 // IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]]
864 // IR-GPU-NEXT: call void @__kmpc_target_deinit()
865 // IR-GPU-NEXT: ret void
866 // IR-GPU: worker.exit:
867 // IR-GPU-NEXT: ret void
868 //
869 //
870 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined
871 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
872 // IR-GPU-NEXT: entry:
873 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
874 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
875 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
876 // IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
877 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
878 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
879 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
880 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
881 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
882 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
883 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
884 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
885 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
886 // IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
887 // IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
888 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
889 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
890 // IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
891 // IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
892 // IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
893 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x ptr], align 8, addrspace(5)
894 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
895 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
896 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
897 // IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr
898 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
899 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
900 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
901 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
902 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
903 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
904 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
905 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
906 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
907 // IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
908 // IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
909 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
910 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
911 // IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
912 // IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
913 // IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr
914 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
915 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
916 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
917 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
918 // IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8
919 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
920 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
921 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
922 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
923 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
924 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
925 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
926 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
927 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
928 // IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
929 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
930 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
931 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
932 // IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
933 // IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
934 // IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
935 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
936 // IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
937 // IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
938 // IR-GPU: omp.precond.then:
939 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
940 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
941 // IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
942 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
943 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
944 // IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
945 // IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
946 // IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
947 // IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
948 // IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
949 // IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
950 // IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
951 // IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
952 // IR-GPU: cond.true:
953 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
954 // IR-GPU-NEXT: br label [[COND_END:%.*]]
955 // IR-GPU: cond.false:
956 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
957 // IR-GPU-NEXT: br label [[COND_END]]
958 // IR-GPU: cond.end:
959 // IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
960 // IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
961 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
962 // IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
963 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
964 // IR-GPU: omp.inner.for.cond:
965 // IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
966 // IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
967 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
968 // IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
969 // IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
970 // IR-GPU: omp.inner.for.body:
971 // IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
972 // IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
973 // IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
974 // IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
975 // IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
976 // IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
977 // IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
978 // IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
979 // IR-GPU-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED_ASCAST]], align 4
980 // IR-GPU-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8
981 // IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
982 // IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP18]] to ptr
983 // IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
984 // IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
985 // IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP20]] to ptr
986 // IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
987 // IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
988 // IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP22]] to ptr
989 // IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
990 // IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
991 // IR-GPU-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP24]] to ptr
992 // IR-GPU-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8
993 // IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
994 // IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP0]] to ptr
995 // IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8
996 // IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
997 // IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP35]], align 8
998 // IR-GPU-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
999 // IR-GPU-NEXT: [[TMP37:%.*]] = inttoptr i64 [[TMP2]] to ptr
1000 // IR-GPU-NEXT: store ptr [[TMP37]], ptr [[TMP36]], align 8
1001 // IR-GPU-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 7
1002 // IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP38]], align 8
1003 // IR-GPU-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1004 // IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4
1005 // IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP40]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 8)
1006 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1007 // IR-GPU: omp.inner.for.inc:
1008 // IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1009 // IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1010 // IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
1011 // IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4
1012 // IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
1013 // IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1014 // IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP43]], [[TMP44]]
1015 // IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
1016 // IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
1017 // IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1018 // IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP45]], [[TMP46]]
1019 // IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
1020 // IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
1021 // IR-GPU-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
1022 // IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP47]], [[TMP48]]
1023 // IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
1024 // IR-GPU: cond.true12:
1025 // IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
1026 // IR-GPU-NEXT: br label [[COND_END14:%.*]]
1027 // IR-GPU: cond.false13:
1028 // IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
1029 // IR-GPU-NEXT: br label [[COND_END14]]
1030 // IR-GPU: cond.end14:
1031 // IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP49]], [[COND_TRUE12]] ], [ [[TMP50]], [[COND_FALSE13]] ]
1032 // IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
1033 // IR-GPU-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
1034 // IR-GPU-NEXT: store i32 [[TMP51]], ptr [[DOTOMP_IV_ASCAST]], align 4
1035 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
1036 // IR-GPU: omp.inner.for.end:
1037 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1038 // IR-GPU: omp.loop.exit:
1039 // IR-GPU-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1040 // IR-GPU-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4
1041 // IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP53]])
1042 // IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
1043 // IR-GPU: omp.precond.end:
1044 // IR-GPU-NEXT: ret void
1045 //
1046 //
1047 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined
1048 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
1049 // IR-GPU-NEXT: entry:
1050 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1051 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1052 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
1053 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
1054 // IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
1055 // IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
1056 // IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
1057 // IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1058 // IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
1059 // IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1060 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
1061 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
1062 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
1063 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
1064 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
1065 // IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
1066 // IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
1067 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
1068 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
1069 // IR-GPU-NEXT: [[I6:%.*]] = alloca i32, align 4, addrspace(5)
1070 // IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
1071 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
1072 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
1073 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
1074 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
1075 // IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
1076 // IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr
1077 // IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
1078 // IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
1079 // IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
1080 // IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
1081 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
1082 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
1083 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
1084 // IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
1085 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
1086 // IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
1087 // IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
1088 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
1089 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
1090 // IR-GPU-NEXT: [[I6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I6]] to ptr
1091 // IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
1092 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1093 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
1094 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
1095 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
1096 // IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
1097 // IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8
1098 // IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
1099 // IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
1100 // IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
1101 // IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
1102 // IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
1103 // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
1104 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
1105 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
1106 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
1107 // IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1108 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1109 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
1110 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1111 // IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
1112 // IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
1113 // IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
1114 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1115 // IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
1116 // IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1117 // IR-GPU: omp.precond.then:
1118 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
1119 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
1120 // IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4
1121 // IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
1122 // IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
1123 // IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
1124 // IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
1125 // IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
1126 // IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4
1127 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1128 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
1129 // IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1130 // IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
1131 // IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
1132 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
1133 // IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
1134 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1135 // IR-GPU: omp.inner.for.cond:
1136 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1137 // IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64
1138 // IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
1139 // IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]]
1140 // IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1141 // IR-GPU: omp.inner.for.body:
1142 // IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1143 // IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
1144 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1145 // IR-GPU-NEXT: store i32 [[ADD]], ptr [[I6_ASCAST]], align 4
1146 // IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
1147 // IR-GPU-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0
1148 // IR-GPU-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
1149 // IR-GPU: if.then:
1150 // IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() #[[ATTR6:[0-9]+]]
1151 // IR-GPU-NEXT: store i32 [[CALL]], ptr [[NT_ADDR_ASCAST]], align 4
1152 // IR-GPU-NEXT: br label [[IF_END]]
1153 // IR-GPU: if.end:
1154 // IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
1155 // IR-GPU-NEXT: br label [[FOR_COND:%.*]]
1156 // IR-GPU: for.cond:
1157 // IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[J_ASCAST]], align 4
1158 // IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
1159 // IR-GPU-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
1160 // IR-GPU-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
1161 // IR-GPU: for.body:
1162 // IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[J_ASCAST]], align 4
1163 // IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
1164 // IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
1165 // IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1166 // IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
1167 // IR-GPU-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
1168 // IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
1169 // IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP22]]
1170 // IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4
1171 // IR-GPU-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP23]] to i64
1172 // IR-GPU-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]]
1173 // IR-GPU-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4
1174 // IR-GPU-NEXT: br label [[FOR_INC:%.*]]
1175 // IR-GPU: for.inc:
1176 // IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[J_ASCAST]], align 4
1177 // IR-GPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1
1178 // IR-GPU-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4
1179 // IR-GPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
1180 // IR-GPU: for.end:
1181 // IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1182 // IR-GPU: omp.body.continue:
1183 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1184 // IR-GPU: omp.inner.for.inc:
1185 // IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1186 // IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1187 // IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
1188 // IR-GPU-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4
1189 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
1190 // IR-GPU: omp.inner.for.end:
1191 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1192 // IR-GPU: omp.loop.exit:
1193 // IR-GPU-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1194 // IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
1195 // IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP28]])
1196 // IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
1197 // IR-GPU: omp.precond.end:
1198 // IR-GPU-NEXT: ret void
1199 //
1200 //
1201 // IR-LABEL: define {{[^@]+}}@main
1202 // IR-SAME: () #[[ATTR0:[0-9]+]] {
1203 // IR-NEXT: entry:
1204 // IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
1205 // IR-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
1206 // IR-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
1207 // IR-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
1208 // IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1209 // IR-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8
1210 // IR-NEXT: [[NT:%.*]] = alloca i32, align 4
1211 // IR-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8
1212 // IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
1213 // IR-NEXT: store i32 0, ptr [[RETVAL]], align 4
1214 // IR-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
1215 // IR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1216 // IR-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
1217 // IR-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
1218 // IR-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
1219 // IR-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
1220 // IR-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
1221 // IR-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1222 // IR-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
1223 // IR-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
1224 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
1225 // IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
1226 // IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
1227 // IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
1228 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4
1229 // IR-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4
1230 // IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8
1231 // IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
1232 // IR-NEXT: store i32 0, ptr [[NT]], align 4
1233 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4
1234 // IR-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4
1235 // IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8
1236 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4
1237 // IR-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4
1238 // IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8
1239 // IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
1240 // IR-NEXT: store i32 0, ptr [[RETVAL]], align 4
1241 // IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
1242 // IR-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
1243 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4
1244 // IR-NEXT: ret i32 [[TMP14]]
1245 //
1246 //
1247 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
1248 // IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
1249 // IR-NEXT: entry:
1250 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1251 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1252 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1253 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1254 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1255 // IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1256 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1257 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1258 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1259 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1260 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1261 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1262 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1263 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1264 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1265 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1266 // IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
1267 // IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
1268 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
1269 // IR-NEXT: ret void
1270 //
1271 //
1272 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined
1273 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1274 // IR-NEXT: entry:
1275 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1276 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1277 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1278 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1279 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1280 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1281 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1282 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1283 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
1284 // IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1285 // IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
1286 // IR-NEXT: [[J:%.*]] = alloca i32, align 4
1287 // IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1288 // IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1289 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1290 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1291 // IR-NEXT: [[J5:%.*]] = alloca i32, align 4
1292 // IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1293 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1294 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1295 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1296 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1297 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1298 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1299 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1300 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1301 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1302 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1303 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1304 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1305 // IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
1306 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1307 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
1308 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1309 // IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
1310 // IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
1311 // IR-NEXT: store i32 0, ptr [[J]], align 4
1312 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1313 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
1314 // IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1315 // IR: omp.precond.then:
1316 // IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
1317 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1318 // IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
1319 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1320 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1321 // IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1322 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
1323 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
1324 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1325 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1326 // IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
1327 // IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1328 // IR: cond.true:
1329 // IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1330 // IR-NEXT: br label [[COND_END:%.*]]
1331 // IR: cond.false:
1332 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1333 // IR-NEXT: br label [[COND_END]]
1334 // IR: cond.end:
1335 // IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
1336 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
1337 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1338 // IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
1339 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1340 // IR: omp.inner.for.cond:
1341 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1342 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1343 // IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
1344 // IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1345 // IR: omp.inner.for.body:
1346 // IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1347 // IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
1348 // IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1349 // IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
1350 // IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
1351 // IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
1352 // IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
1353 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
1354 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1355 // IR: omp.inner.for.inc:
1356 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1357 // IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1358 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
1359 // IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
1360 // IR-NEXT: br label [[OMP_INNER_FOR_COND]]
1361 // IR: omp.inner.for.end:
1362 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1363 // IR: omp.loop.exit:
1364 // IR-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1365 // IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
1366 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
1367 // IR-NEXT: br label [[OMP_PRECOND_END]]
1368 // IR: omp.precond.end:
1369 // IR-NEXT: ret void
1370 //
1371 //
1372 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined
1373 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1374 // IR-NEXT: entry:
1375 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1376 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1377 // IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
1378 // IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
1379 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1380 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1381 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1382 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1383 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1384 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1385 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
1386 // IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1387 // IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
1388 // IR-NEXT: [[J:%.*]] = alloca i32, align 4
1389 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1390 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1391 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1392 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1393 // IR-NEXT: [[J6:%.*]] = alloca i32, align 4
1394 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1395 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1396 // IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1397 // IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1398 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1399 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1400 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1401 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1402 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1403 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1404 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1405 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1406 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1407 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1408 // IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
1409 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1410 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
1411 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1412 // IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
1413 // IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
1414 // IR-NEXT: store i32 0, ptr [[J]], align 4
1415 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1416 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
1417 // IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1418 // IR: omp.precond.then:
1419 // IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1420 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1421 // IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
1422 // IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1423 // IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
1424 // IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1425 // IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
1426 // IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
1427 // IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
1428 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1429 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1430 // IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1431 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
1432 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
1433 // IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1434 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1435 // IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
1436 // IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1437 // IR: cond.true:
1438 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1439 // IR-NEXT: br label [[COND_END:%.*]]
1440 // IR: cond.false:
1441 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1442 // IR-NEXT: br label [[COND_END]]
1443 // IR: cond.end:
1444 // IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
1445 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
1446 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
1447 // IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
1448 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1449 // IR: omp.inner.for.cond:
1450 // IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1451 // IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1452 // IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
1453 // IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1454 // IR: omp.inner.for.body:
1455 // IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1456 // IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
1457 // IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1458 // IR-NEXT: store i32 [[ADD]], ptr [[J6]], align 4
1459 // IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4
1460 // IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
1461 // IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
1462 // IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1463 // IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4
1464 // IR-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64
1465 // IR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]]
1466 // IR-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4
1467 // IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1468 // IR: omp.body.continue:
1469 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1470 // IR: omp.inner.for.inc:
1471 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1472 // IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1
1473 // IR-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
1474 // IR-NEXT: br label [[OMP_INNER_FOR_COND]]
1475 // IR: omp.inner.for.end:
1476 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1477 // IR: omp.loop.exit:
1478 // IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1479 // IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
1480 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
1481 // IR-NEXT: br label [[OMP_PRECOND_END]]
1482 // IR: omp.precond.end:
1483 // IR-NEXT: ret void
1484 //
1485 //
1486 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46
1487 // IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1488 // IR-NEXT: entry:
1489 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1490 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1491 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1492 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1493 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1494 // IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1495 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1496 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1497 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1498 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1499 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1500 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1501 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1502 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1503 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1504 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1505 // IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
1506 // IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
1507 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
1508 // IR-NEXT: ret void
1509 //
1510 //
1511 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined
1512 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1513 // IR-NEXT: entry:
1514 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1515 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1516 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1517 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1518 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1519 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1520 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1521 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
1522 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
1523 // IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
1524 // IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1525 // IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
1526 // IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
1527 // IR-NEXT: [[I:%.*]] = alloca i32, align 4
1528 // IR-NEXT: [[J:%.*]] = alloca i32, align 4
1529 // IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
1530 // IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
1531 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
1532 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1533 // IR-NEXT: [[I11:%.*]] = alloca i32, align 4
1534 // IR-NEXT: [[J12:%.*]] = alloca i32, align 4
1535 // IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1536 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1537 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1538 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1539 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1540 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1541 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1542 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1543 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1544 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1545 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1546 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1547 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1548 // IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
1549 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
1550 // IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
1551 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1552 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
1553 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1554 // IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
1555 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
1556 // IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
1557 // IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
1558 // IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
1559 // IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
1560 // IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
1561 // IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
1562 // IR-NEXT: store i32 0, ptr [[I]], align 4
1563 // IR-NEXT: store i32 0, ptr [[J]], align 4
1564 // IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1565 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
1566 // IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
1567 // IR: land.lhs.true:
1568 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
1569 // IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
1570 // IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
1571 // IR: omp.precond.then:
1572 // IR-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
1573 // IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
1574 // IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
1575 // IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
1576 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1577 // IR-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1578 // IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
1579 // IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
1580 // IR-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
1581 // IR-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
1582 // IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
1583 // IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1584 // IR: cond.true:
1585 // IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
1586 // IR-NEXT: br label [[COND_END:%.*]]
1587 // IR: cond.false:
1588 // IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
1589 // IR-NEXT: br label [[COND_END]]
1590 // IR: cond.end:
1591 // IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
1592 // IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
1593 // IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
1594 // IR-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
1595 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1596 // IR: omp.inner.for.cond:
1597 // IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
1598 // IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
1599 // IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
1600 // IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1601 // IR: omp.inner.for.body:
1602 // IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
1603 // IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
1604 // IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
1605 // IR-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
1606 // IR-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
1607 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
1608 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1609 // IR: omp.inner.for.inc:
1610 // IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
1611 // IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
1612 // IR-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
1613 // IR-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
1614 // IR-NEXT: br label [[OMP_INNER_FOR_COND]]
1615 // IR: omp.inner.for.end:
1616 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1617 // IR: omp.loop.exit:
1618 // IR-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1619 // IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
1620 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
1621 // IR-NEXT: br label [[OMP_PRECOND_END]]
1622 // IR: omp.precond.end:
1623 // IR-NEXT: ret void
1624 //
1625 //
1626 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined
1627 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1628 // IR-NEXT: entry:
1629 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1630 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1631 // IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
1632 // IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
1633 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1634 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1635 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1636 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1637 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1638 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
1639 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
1640 // IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
1641 // IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1642 // IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
1643 // IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
1644 // IR-NEXT: [[I:%.*]] = alloca i32, align 4
1645 // IR-NEXT: [[J:%.*]] = alloca i32, align 4
1646 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
1647 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
1648 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
1649 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1650 // IR-NEXT: [[I11:%.*]] = alloca i32, align 4
1651 // IR-NEXT: [[J12:%.*]] = alloca i32, align 4
1652 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1653 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1654 // IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1655 // IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1656 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1657 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1658 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1659 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1660 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1661 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1662 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1663 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1664 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1665 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1666 // IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
1667 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
1668 // IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
1669 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1670 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
1671 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1672 // IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
1673 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
1674 // IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
1675 // IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
1676 // IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
1677 // IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
1678 // IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
1679 // IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
1680 // IR-NEXT: store i32 0, ptr [[I]], align 4
1681 // IR-NEXT: store i32 0, ptr [[J]], align 4
1682 // IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1683 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
1684 // IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
1685 // IR: land.lhs.true:
1686 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
1687 // IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
1688 // IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
1689 // IR: omp.precond.then:
1690 // IR-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
1691 // IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
1692 // IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
1693 // IR-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1694 // IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1695 // IR-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
1696 // IR-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
1697 // IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
1698 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1699 // IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1700 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
1701 // IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
1702 // IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
1703 // IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
1704 // IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
1705 // IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1706 // IR: cond.true:
1707 // IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
1708 // IR-NEXT: br label [[COND_END:%.*]]
1709 // IR: cond.false:
1710 // IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
1711 // IR-NEXT: br label [[COND_END]]
1712 // IR: cond.end:
1713 // IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
1714 // IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
1715 // IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
1716 // IR-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
1717 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1718 // IR: omp.inner.for.cond:
1719 // IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
1720 // IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
1721 // IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
1722 // IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1723 // IR: omp.inner.for.body:
1724 // IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
1725 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
1726 // IR-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
1727 // IR-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
1728 // IR-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
1729 // IR-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
1730 // IR-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
1731 // IR-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
1732 // IR-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
1733 // IR-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
1734 // IR-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
1735 // IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
1736 // IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
1737 // IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
1738 // IR-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
1739 // IR-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
1740 // IR-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
1741 // IR-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
1742 // IR-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
1743 // IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
1744 // IR-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
1745 // IR-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
1746 // IR-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
1747 // IR-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
1748 // IR-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
1749 // IR-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
1750 // IR-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
1751 // IR-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
1752 // IR-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
1753 // IR-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
1754 // IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
1755 // IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
1756 // IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
1757 // IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1758 // IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
1759 // IR-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
1760 // IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
1761 // IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]]
1762 // IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
1763 // IR-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
1764 // IR-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
1765 // IR-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
1766 // IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1767 // IR: omp.body.continue:
1768 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1769 // IR: omp.inner.for.inc:
1770 // IR-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
1771 // IR-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
1772 // IR-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
1773 // IR-NEXT: br label [[OMP_INNER_FOR_COND]]
1774 // IR: omp.inner.for.end:
1775 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1776 // IR: omp.loop.exit:
1777 // IR-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1778 // IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
1779 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]])
1780 // IR-NEXT: br label [[OMP_PRECOND_END]]
1781 // IR: omp.precond.end:
1782 // IR-NEXT: ret void
1783 //
1784 //
1785 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55
1786 // IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1787 // IR-NEXT: entry:
1788 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1789 // IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
1790 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1791 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1792 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1793 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1794 // IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1795 // IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
1796 // IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
1797 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1798 // IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
1799 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1800 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1801 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1802 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1803 // IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1804 // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1805 // IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1806 // IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1807 // IR-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0)
1808 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
1809 // IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
1810 // IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
1811 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4
1812 // IR-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4
1813 // IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8
1814 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]])
1815 // IR-NEXT: ret void
1816 //
1817 //
1818 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined
1819 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1820 // IR-NEXT: entry:
1821 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1822 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1823 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1824 // IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
1825 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1826 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1827 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1828 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1829 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1830 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
1831 // IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1832 // IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
1833 // IR-NEXT: [[I:%.*]] = alloca i32, align 4
1834 // IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1835 // IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1836 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1837 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1838 // IR-NEXT: [[I5:%.*]] = alloca i32, align 4
1839 // IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
1840 // IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
1841 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1842 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1843 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1844 // IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
1845 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1846 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1847 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1848 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1849 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1850 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1851 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1852 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1853 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1854 // IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
1855 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1856 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
1857 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1858 // IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
1859 // IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
1860 // IR-NEXT: store i32 0, ptr [[I]], align 4
1861 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1862 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
1863 // IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1864 // IR: omp.precond.then:
1865 // IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
1866 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1867 // IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
1868 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1869 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1870 // IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1871 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
1872 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
1873 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1874 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1875 // IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
1876 // IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1877 // IR: cond.true:
1878 // IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1879 // IR-NEXT: br label [[COND_END:%.*]]
1880 // IR: cond.false:
1881 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1882 // IR-NEXT: br label [[COND_END]]
1883 // IR: cond.end:
1884 // IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
1885 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
1886 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1887 // IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
1888 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1889 // IR: omp.inner.for.cond:
1890 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1891 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1892 // IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
1893 // IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1894 // IR: omp.inner.for.body:
1895 // IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
1896 // IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
1897 // IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
1898 // IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
1899 // IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
1900 // IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
1901 // IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
1902 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4
1903 // IR-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4
1904 // IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8
1905 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
1906 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1907 // IR: omp.inner.for.inc:
1908 // IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
1909 // IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
1910 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
1911 // IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
1912 // IR-NEXT: br label [[OMP_INNER_FOR_COND]]
1913 // IR: omp.inner.for.end:
1914 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1915 // IR: omp.loop.exit:
1916 // IR-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1917 // IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
1918 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
1919 // IR-NEXT: br label [[OMP_PRECOND_END]]
1920 // IR: omp.precond.end:
1921 // IR-NEXT: ret void
1922 //
1923 //
1924 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined
1925 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
1926 // IR-NEXT: entry:
1927 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1928 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1929 // IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
1930 // IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
1931 // IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
1932 // IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
1933 // IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
1934 // IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
1935 // IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
1936 // IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
1937 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1938 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
1939 // IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1940 // IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
1941 // IR-NEXT: [[I:%.*]] = alloca i32, align 4
1942 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1943 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1944 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1945 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1946 // IR-NEXT: [[I6:%.*]] = alloca i32, align 4
1947 // IR-NEXT: [[J:%.*]] = alloca i32, align 4
1948 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
1949 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
1950 // IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1951 // IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1952 // IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
1953 // IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
1954 // IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
1955 // IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
1956 // IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
1957 // IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
1958 // IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
1959 // IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
1960 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
1961 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
1962 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
1963 // IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
1964 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1965 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
1966 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1967 // IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
1968 // IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
1969 // IR-NEXT: store i32 0, ptr [[I]], align 4
1970 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
1971 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
1972 // IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1973 // IR: omp.precond.then:
1974 // IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
1975 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1976 // IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
1977 // IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
1978 // IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
1979 // IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
1980 // IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
1981 // IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
1982 // IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
1983 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
1984 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
1985 // IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
1986 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
1987 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
1988 // IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1989 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1990 // IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
1991 // IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1992 // IR: cond.true:
1993 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
1994 // IR-NEXT: br label [[COND_END:%.*]]
1995 // IR: cond.false:
1996 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
1997 // IR-NEXT: br label [[COND_END]]
1998 // IR: cond.end:
1999 // IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
2000 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2001 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2002 // IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
2003 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2004 // IR: omp.inner.for.cond:
2005 // IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2006 // IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2007 // IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
2008 // IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2009 // IR: omp.inner.for.body:
2010 // IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2011 // IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
2012 // IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2013 // IR-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
2014 // IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4
2015 // IR-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
2016 // IR-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
2017 // IR: if.then:
2018 // IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv()
2019 // IR-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4
2020 // IR-NEXT: br label [[IF_END]]
2021 // IR: if.end:
2022 // IR-NEXT: store i32 0, ptr [[J]], align 4
2023 // IR-NEXT: br label [[FOR_COND:%.*]]
2024 // IR: for.cond:
2025 // IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4
2026 // IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
2027 // IR-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]]
2028 // IR-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
2029 // IR: for.body:
2030 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4
2031 // IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
2032 // IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
2033 // IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
2034 // IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4
2035 // IR-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]]
2036 // IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4
2037 // IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]]
2038 // IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4
2039 // IR-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64
2040 // IR-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]]
2041 // IR-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4
2042 // IR-NEXT: br label [[FOR_INC:%.*]]
2043 // IR: for.inc:
2044 // IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4
2045 // IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1
2046 // IR-NEXT: store i32 [[INC]], ptr [[J]], align 4
2047 // IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
2048 // IR: for.end:
2049 // IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2050 // IR: omp.body.continue:
2051 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2052 // IR: omp.inner.for.inc:
2053 // IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2054 // IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1
2055 // IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4
2056 // IR-NEXT: br label [[OMP_INNER_FOR_COND]]
2057 // IR: omp.inner.for.end:
2058 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2059 // IR: omp.loop.exit:
2060 // IR-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2061 // IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
2062 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP31]])
2063 // IR-NEXT: br label [[OMP_PRECOND_END]]
2064 // IR: omp.precond.end:
2065 // IR-NEXT: ret void
2066 //
2067 //
2068 // IR-PCH-LABEL: define {{[^@]+}}@main
2069 // IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
2070 // IR-PCH-NEXT: entry:
2071 // IR-PCH-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
2072 // IR-PCH-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
2073 // IR-PCH-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
2074 // IR-PCH-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
2075 // IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2076 // IR-PCH-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8
2077 // IR-PCH-NEXT: [[NT:%.*]] = alloca i32, align 4
2078 // IR-PCH-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8
2079 // IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
2080 // IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4
2081 // IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
2082 // IR-PCH-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2083 // IR-PCH-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
2084 // IR-PCH-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
2085 // IR-PCH-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
2086 // IR-PCH-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
2087 // IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
2088 // IR-PCH-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
2089 // IR-PCH-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
2090 // IR-PCH-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
2091 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
2092 // IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
2093 // IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
2094 // IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
2095 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4
2096 // IR-PCH-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4
2097 // IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8
2098 // IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
2099 // IR-PCH-NEXT: store i32 0, ptr [[NT]], align 4
2100 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4
2101 // IR-PCH-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4
2102 // IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8
2103 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4
2104 // IR-PCH-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4
2105 // IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8
2106 // IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
2107 // IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4
2108 // IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
2109 // IR-PCH-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
2110 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4
2111 // IR-PCH-NEXT: ret i32 [[TMP14]]
2112 //
2113 //
2114 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
2115 // IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
2116 // IR-PCH-NEXT: entry:
2117 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2118 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2119 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2120 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2121 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2122 // IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2123 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2124 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2125 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2126 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2127 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2128 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2129 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2130 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2131 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2132 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2133 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
2134 // IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
2135 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
2136 // IR-PCH-NEXT: ret void
2137 //
2138 //
2139 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined
2140 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2141 // IR-PCH-NEXT: entry:
2142 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2143 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2144 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2145 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2146 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2147 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2148 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2149 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2150 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
2151 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2152 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
2153 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
2154 // IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
2155 // IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
2156 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2157 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2158 // IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4
2159 // IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2160 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2161 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2162 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2163 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2164 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2165 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2166 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2167 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2168 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2169 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2170 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2171 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2172 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
2173 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2174 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
2175 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2176 // IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
2177 // IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
2178 // IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
2179 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2180 // IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
2181 // IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
2182 // IR-PCH: omp.precond.then:
2183 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
2184 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2185 // IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
2186 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2187 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2188 // IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2189 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
2190 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
2191 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2192 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2193 // IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
2194 // IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2195 // IR-PCH: cond.true:
2196 // IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2197 // IR-PCH-NEXT: br label [[COND_END:%.*]]
2198 // IR-PCH: cond.false:
2199 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2200 // IR-PCH-NEXT: br label [[COND_END]]
2201 // IR-PCH: cond.end:
2202 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
2203 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
2204 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
2205 // IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
2206 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2207 // IR-PCH: omp.inner.for.cond:
2208 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2209 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2210 // IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
2211 // IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2212 // IR-PCH: omp.inner.for.body:
2213 // IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
2214 // IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
2215 // IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2216 // IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
2217 // IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
2218 // IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
2219 // IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
2220 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
2221 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2222 // IR-PCH: omp.inner.for.inc:
2223 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2224 // IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2225 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
2226 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
2227 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
2228 // IR-PCH: omp.inner.for.end:
2229 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2230 // IR-PCH: omp.loop.exit:
2231 // IR-PCH-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2232 // IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
2233 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
2234 // IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
2235 // IR-PCH: omp.precond.end:
2236 // IR-PCH-NEXT: ret void
2237 //
2238 //
2239 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined
2240 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2241 // IR-PCH-NEXT: entry:
2242 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2243 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2244 // IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
2245 // IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
2246 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2247 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2248 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2249 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2250 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2251 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2252 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
2253 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2254 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
2255 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
2256 // IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2257 // IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2258 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2259 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2260 // IR-PCH-NEXT: [[J6:%.*]] = alloca i32, align 4
2261 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2262 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2263 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
2264 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
2265 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2266 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2267 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2268 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2269 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2270 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2271 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2272 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2273 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2274 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2275 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
2276 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2277 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
2278 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2279 // IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
2280 // IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
2281 // IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
2282 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2283 // IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
2284 // IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
2285 // IR-PCH: omp.precond.then:
2286 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2287 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2288 // IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
2289 // IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
2290 // IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
2291 // IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
2292 // IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
2293 // IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
2294 // IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
2295 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2296 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2297 // IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2298 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
2299 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
2300 // IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2301 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2302 // IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
2303 // IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2304 // IR-PCH: cond.true:
2305 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2306 // IR-PCH-NEXT: br label [[COND_END:%.*]]
2307 // IR-PCH: cond.false:
2308 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2309 // IR-PCH-NEXT: br label [[COND_END]]
2310 // IR-PCH: cond.end:
2311 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
2312 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2313 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2314 // IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
2315 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2316 // IR-PCH: omp.inner.for.cond:
2317 // IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2318 // IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2319 // IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
2320 // IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2321 // IR-PCH: omp.inner.for.body:
2322 // IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2323 // IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
2324 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2325 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[J6]], align 4
2326 // IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4
2327 // IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
2328 // IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
2329 // IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
2330 // IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4
2331 // IR-PCH-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64
2332 // IR-PCH-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]]
2333 // IR-PCH-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4
2334 // IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2335 // IR-PCH: omp.body.continue:
2336 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2337 // IR-PCH: omp.inner.for.inc:
2338 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2339 // IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1
2340 // IR-PCH-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
2341 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
2342 // IR-PCH: omp.inner.for.end:
2343 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2344 // IR-PCH: omp.loop.exit:
2345 // IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2346 // IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
2347 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
2348 // IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
2349 // IR-PCH: omp.precond.end:
2350 // IR-PCH-NEXT: ret void
2351 //
2352 //
2353 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46
2354 // IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2355 // IR-PCH-NEXT: entry:
2356 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2357 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2358 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2359 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2360 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2361 // IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2362 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2363 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2364 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2365 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2366 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2367 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2368 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2369 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2370 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2371 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2372 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
2373 // IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
2374 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
2375 // IR-PCH-NEXT: ret void
2376 //
2377 //
2378 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined
2379 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2380 // IR-PCH-NEXT: entry:
2381 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2382 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2383 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2384 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2385 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2386 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2387 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2388 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
2389 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
2390 // IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
2391 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2392 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
2393 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
2394 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
2395 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
2396 // IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
2397 // IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
2398 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
2399 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2400 // IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4
2401 // IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4
2402 // IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2403 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2404 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2405 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2406 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2407 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2408 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2409 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2410 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2411 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2412 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2413 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2414 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2415 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
2416 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
2417 // IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
2418 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2419 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
2420 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2421 // IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
2422 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
2423 // IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
2424 // IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
2425 // IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
2426 // IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
2427 // IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
2428 // IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
2429 // IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
2430 // IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
2431 // IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2432 // IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
2433 // IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
2434 // IR-PCH: land.lhs.true:
2435 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
2436 // IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
2437 // IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
2438 // IR-PCH: omp.precond.then:
2439 // IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
2440 // IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
2441 // IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
2442 // IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
2443 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2444 // IR-PCH-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2445 // IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
2446 // IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
2447 // IR-PCH-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
2448 // IR-PCH-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
2449 // IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
2450 // IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2451 // IR-PCH: cond.true:
2452 // IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
2453 // IR-PCH-NEXT: br label [[COND_END:%.*]]
2454 // IR-PCH: cond.false:
2455 // IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
2456 // IR-PCH-NEXT: br label [[COND_END]]
2457 // IR-PCH: cond.end:
2458 // IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
2459 // IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
2460 // IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
2461 // IR-PCH-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
2462 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2463 // IR-PCH: omp.inner.for.cond:
2464 // IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
2465 // IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
2466 // IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
2467 // IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2468 // IR-PCH: omp.inner.for.body:
2469 // IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
2470 // IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
2471 // IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
2472 // IR-PCH-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
2473 // IR-PCH-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
2474 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
2475 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2476 // IR-PCH: omp.inner.for.inc:
2477 // IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
2478 // IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
2479 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
2480 // IR-PCH-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
2481 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
2482 // IR-PCH: omp.inner.for.end:
2483 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2484 // IR-PCH: omp.loop.exit:
2485 // IR-PCH-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2486 // IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
2487 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
2488 // IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
2489 // IR-PCH: omp.precond.end:
2490 // IR-PCH-NEXT: ret void
2491 //
2492 //
2493 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined
2494 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2495 // IR-PCH-NEXT: entry:
2496 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2497 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2498 // IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
2499 // IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
2500 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2501 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2502 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2503 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2504 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2505 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
2506 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
2507 // IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
2508 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2509 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
2510 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
2511 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
2512 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
2513 // IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
2514 // IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
2515 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
2516 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2517 // IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4
2518 // IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4
2519 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2520 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2521 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
2522 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
2523 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2524 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2525 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2526 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2527 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2528 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2529 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2530 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2531 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2532 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2533 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
2534 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
2535 // IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
2536 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2537 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
2538 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2539 // IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
2540 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
2541 // IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
2542 // IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
2543 // IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
2544 // IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
2545 // IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
2546 // IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
2547 // IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
2548 // IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
2549 // IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2550 // IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
2551 // IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
2552 // IR-PCH: land.lhs.true:
2553 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
2554 // IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
2555 // IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
2556 // IR-PCH: omp.precond.then:
2557 // IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
2558 // IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
2559 // IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
2560 // IR-PCH-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
2561 // IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
2562 // IR-PCH-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
2563 // IR-PCH-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
2564 // IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
2565 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2566 // IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2567 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
2568 // IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
2569 // IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
2570 // IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
2571 // IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
2572 // IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2573 // IR-PCH: cond.true:
2574 // IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
2575 // IR-PCH-NEXT: br label [[COND_END:%.*]]
2576 // IR-PCH: cond.false:
2577 // IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
2578 // IR-PCH-NEXT: br label [[COND_END]]
2579 // IR-PCH: cond.end:
2580 // IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
2581 // IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
2582 // IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
2583 // IR-PCH-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
2584 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2585 // IR-PCH: omp.inner.for.cond:
2586 // IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
2587 // IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
2588 // IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
2589 // IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2590 // IR-PCH: omp.inner.for.body:
2591 // IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
2592 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
2593 // IR-PCH-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
2594 // IR-PCH-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
2595 // IR-PCH-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
2596 // IR-PCH-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
2597 // IR-PCH-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
2598 // IR-PCH-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
2599 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
2600 // IR-PCH-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
2601 // IR-PCH-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
2602 // IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
2603 // IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
2604 // IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
2605 // IR-PCH-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
2606 // IR-PCH-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
2607 // IR-PCH-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
2608 // IR-PCH-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
2609 // IR-PCH-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
2610 // IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
2611 // IR-PCH-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
2612 // IR-PCH-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
2613 // IR-PCH-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
2614 // IR-PCH-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
2615 // IR-PCH-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
2616 // IR-PCH-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
2617 // IR-PCH-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
2618 // IR-PCH-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
2619 // IR-PCH-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
2620 // IR-PCH-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
2621 // IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
2622 // IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
2623 // IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
2624 // IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
2625 // IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
2626 // IR-PCH-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
2627 // IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
2628 // IR-PCH-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]]
2629 // IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
2630 // IR-PCH-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
2631 // IR-PCH-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
2632 // IR-PCH-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
2633 // IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2634 // IR-PCH: omp.body.continue:
2635 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2636 // IR-PCH: omp.inner.for.inc:
2637 // IR-PCH-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
2638 // IR-PCH-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
2639 // IR-PCH-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
2640 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
2641 // IR-PCH: omp.inner.for.end:
2642 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2643 // IR-PCH: omp.loop.exit:
2644 // IR-PCH-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2645 // IR-PCH-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
2646 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]])
2647 // IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
2648 // IR-PCH: omp.precond.end:
2649 // IR-PCH-NEXT: ret void
2650 //
2651 //
2652 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55
2653 // IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2654 // IR-PCH-NEXT: entry:
2655 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2656 // IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
2657 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2658 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2659 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2660 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2661 // IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2662 // IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
2663 // IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
2664 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2665 // IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
2666 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2667 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2668 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2669 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2670 // IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2671 // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2672 // IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2673 // IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2674 // IR-PCH-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0)
2675 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
2676 // IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
2677 // IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
2678 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4
2679 // IR-PCH-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4
2680 // IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8
2681 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]])
2682 // IR-PCH-NEXT: ret void
2683 //
2684 //
2685 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined
2686 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2687 // IR-PCH-NEXT: entry:
2688 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2689 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2690 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2691 // IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
2692 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2693 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2694 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2695 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2696 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2697 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
2698 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2699 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
2700 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
2701 // IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
2702 // IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
2703 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2704 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2705 // IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4
2706 // IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
2707 // IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
2708 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2709 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2710 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2711 // IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
2712 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2713 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2714 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2715 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2716 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2717 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2718 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2719 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2720 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2721 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
2722 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2723 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
2724 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2725 // IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
2726 // IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
2727 // IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
2728 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2729 // IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
2730 // IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
2731 // IR-PCH: omp.precond.then:
2732 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
2733 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2734 // IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
2735 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2736 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2737 // IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2738 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
2739 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
2740 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2741 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2742 // IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
2743 // IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2744 // IR-PCH: cond.true:
2745 // IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2746 // IR-PCH-NEXT: br label [[COND_END:%.*]]
2747 // IR-PCH: cond.false:
2748 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2749 // IR-PCH-NEXT: br label [[COND_END]]
2750 // IR-PCH: cond.end:
2751 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
2752 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
2753 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
2754 // IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
2755 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2756 // IR-PCH: omp.inner.for.cond:
2757 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2758 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2759 // IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
2760 // IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2761 // IR-PCH: omp.inner.for.body:
2762 // IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
2763 // IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
2764 // IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
2765 // IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
2766 // IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
2767 // IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
2768 // IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
2769 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4
2770 // IR-PCH-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4
2771 // IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8
2772 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
2773 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2774 // IR-PCH: omp.inner.for.inc:
2775 // IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2776 // IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
2777 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
2778 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
2779 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
2780 // IR-PCH: omp.inner.for.end:
2781 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2782 // IR-PCH: omp.loop.exit:
2783 // IR-PCH-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2784 // IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
2785 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
2786 // IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
2787 // IR-PCH: omp.precond.end:
2788 // IR-PCH-NEXT: ret void
2789 //
2790 //
2791 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined
2792 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
2793 // IR-PCH-NEXT: entry:
2794 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2795 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2796 // IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
2797 // IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
2798 // IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
2799 // IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
2800 // IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
2801 // IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
2802 // IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
2803 // IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
2804 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2805 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
2806 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
2807 // IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
2808 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
2809 // IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2810 // IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2811 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2812 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2813 // IR-PCH-NEXT: [[I6:%.*]] = alloca i32, align 4
2814 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
2815 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
2816 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
2817 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
2818 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
2819 // IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
2820 // IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
2821 // IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
2822 // IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
2823 // IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
2824 // IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
2825 // IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
2826 // IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
2827 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
2828 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
2829 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
2830 // IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
2831 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2832 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
2833 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
2834 // IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
2835 // IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
2836 // IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
2837 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
2838 // IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
2839 // IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
2840 // IR-PCH: omp.precond.then:
2841 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
2842 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2843 // IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
2844 // IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
2845 // IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
2846 // IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
2847 // IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
2848 // IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
2849 // IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
2850 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
2851 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
2852 // IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2853 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
2854 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
2855 // IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2856 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2857 // IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
2858 // IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2859 // IR-PCH: cond.true:
2860 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
2861 // IR-PCH-NEXT: br label [[COND_END:%.*]]
2862 // IR-PCH: cond.false:
2863 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2864 // IR-PCH-NEXT: br label [[COND_END]]
2865 // IR-PCH: cond.end:
2866 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
2867 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
2868 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
2869 // IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
2870 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2871 // IR-PCH: omp.inner.for.cond:
2872 // IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2873 // IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
2874 // IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
2875 // IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2876 // IR-PCH: omp.inner.for.body:
2877 // IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2878 // IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
2879 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2880 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
2881 // IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4
2882 // IR-PCH-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
2883 // IR-PCH-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
2884 // IR-PCH: if.then:
2885 // IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv()
2886 // IR-PCH-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4
2887 // IR-PCH-NEXT: br label [[IF_END]]
2888 // IR-PCH: if.end:
2889 // IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
2890 // IR-PCH-NEXT: br label [[FOR_COND:%.*]]
2891 // IR-PCH: for.cond:
2892 // IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4
2893 // IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
2894 // IR-PCH-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]]
2895 // IR-PCH-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
2896 // IR-PCH: for.body:
2897 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4
2898 // IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
2899 // IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
2900 // IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
2901 // IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4
2902 // IR-PCH-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]]
2903 // IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4
2904 // IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]]
2905 // IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4
2906 // IR-PCH-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64
2907 // IR-PCH-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]]
2908 // IR-PCH-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4
2909 // IR-PCH-NEXT: br label [[FOR_INC:%.*]]
2910 // IR-PCH: for.inc:
2911 // IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4
2912 // IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1
2913 // IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4
2914 // IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
2915 // IR-PCH: for.end:
2916 // IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2917 // IR-PCH: omp.body.continue:
2918 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2919 // IR-PCH: omp.inner.for.inc:
2920 // IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
2921 // IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1
2922 // IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4
2923 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
2924 // IR-PCH: omp.inner.for.end:
2925 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2926 // IR-PCH: omp.loop.exit:
2927 // IR-PCH-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2928 // IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
2929 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP31]])
2930 // IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
2931 // IR-PCH: omp.precond.end:
2932 // IR-PCH-NEXT: ret void
2933 //
2934 //
2935 // IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
2936 // IR-GPU-NESTED-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
2937 // IR-GPU-NESTED-NEXT: entry:
2938 // IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2939 // IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
2940 // IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
2941 // IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2942 // IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
2943 // IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2944 // IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
2945 // IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
2946 // IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
2947 // IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
2948 // IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
2949 // IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
2950 // IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
2951 // IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
2952 // IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
2953 // IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
2954 // IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
2955 // IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
2956 // IR-GPU-NESTED-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
2957 // IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
2958 // IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
2959 // IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
2960 // IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
2961 // IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
2962 // IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
2963 // IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
2964 // IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
2965 // IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
2966 // IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment to ptr), ptr [[DYN_PTR]])
2967 // IR-GPU-NESTED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
2968 // IR-GPU-NESTED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2969 // IR-GPU-NESTED: user_code.entry:
2970 // IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
2971 // IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
2972 // IR-GPU-NESTED-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
2973 // IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
2974 // IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
2975 // IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
2976 // IR-GPU-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]]
2977 // IR-GPU-NESTED-NEXT: call void @__kmpc_target_deinit()
2978 // IR-GPU-NESTED-NEXT: ret void
2979 // IR-GPU-NESTED: worker.exit:
2980 // IR-GPU-NESTED-NEXT: ret void
2981 //
2982 //
2983 // IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined
2984 // IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] {
2985 // IR-GPU-NESTED-NEXT: entry:
2986 // IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2987 // IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2988 // IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
2989 // IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
2990 // IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2991 // IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
2992 // IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2993 // IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
2994 // IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
2995 // IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
2996 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
2997 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
2998 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
2999 // IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
3000 // IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
3001 // IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
3002 // IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
3003 // IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
3004 // IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
3005 // IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
3006 // IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
3007 // IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
3008 // IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
3009 // IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
3010 // IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
3011 // IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
3012 // IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
3013 // IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
3014 // IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
3015 // IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
3016 // IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
3017 // IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
3018 // IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
3019 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
3020 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
3021 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
3022 // IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
3023 // IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
3024 // IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
3025 // IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
3026 // IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
3027 // IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
3028 // IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
3029 // IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
3030 // IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
3031 // IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
3032 // IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
3033 // IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
3034 // IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
3035 // IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
3036 // IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
3037 // IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
3038 // IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
3039 // IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
3040 // IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
3041 // IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
3042 // IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
3043 // IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
3044 // IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
3045 // IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
3046 // IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3047 // IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
3048 // IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
3049 // IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3050 // IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
3051 // IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3052 // IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
3053 // IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
3054 // IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
3055 // IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
3056 // IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
3057 // IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3058 // IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
3059 // IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
3060 // IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
3061 // IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
3062 // IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
3063 // IR-GPU-NESTED: land.lhs.true:
3064 // IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3065 // IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
3066 // IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
3067 // IR-GPU-NESTED: omp.precond.then:
3068 // IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
3069 // IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3070 // IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3071 // IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
3072 // IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
3073 // IR-GPU-NESTED-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
3074 // IR-GPU-NESTED-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
3075 // IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
3076 // IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
3077 // IR-GPU-NESTED-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]])
3078 // IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3079 // IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3080 // IR-GPU-NESTED-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
3081 // IR-GPU-NESTED-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3082 // IR-GPU-NESTED: cond.true:
3083 // IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3084 // IR-GPU-NESTED-NEXT: br label [[COND_END:%.*]]
3085 // IR-GPU-NESTED: cond.false:
3086 // IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3087 // IR-GPU-NESTED-NEXT: br label [[COND_END]]
3088 // IR-GPU-NESTED: cond.end:
3089 // IR-GPU-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
3090 // IR-GPU-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3091 // IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
3092 // IR-GPU-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8
3093 // IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3094 // IR-GPU-NESTED: omp.inner.for.cond:
3095 // IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
3096 // IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3097 // IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1
3098 // IR-GPU-NESTED-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]]
3099 // IR-GPU-NESTED-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3100 // IR-GPU-NESTED: omp.inner.for.body:
3101 // IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
3102 // IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3103 // IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
3104 // IR-GPU-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4
3105 // IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
3106 // IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
3107 // IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr
3108 // IR-GPU-NESTED-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
3109 // IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
3110 // IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr
3111 // IR-GPU-NESTED-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
3112 // IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
3113 // IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr
3114 // IR-GPU-NESTED-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8
3115 // IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
3116 // IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr
3117 // IR-GPU-NESTED-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8
3118 // IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
3119 // IR-GPU-NESTED-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8
3120 // IR-GPU-NESTED-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
3121 // IR-GPU-NESTED-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr
3122 // IR-GPU-NESTED-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8
3123 // IR-GPU-NESTED-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
3124 // IR-GPU-NESTED-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8
3125 // IR-GPU-NESTED-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
3126 // IR-GPU-NESTED-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
3127 // IR-GPU-NESTED-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
3128 // IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3129 // IR-GPU-NESTED: omp.inner.for.inc:
3130 // IR-GPU-NESTED-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
3131 // IR-GPU-NESTED-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
3132 // IR-GPU-NESTED-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]]
3133 // IR-GPU-NESTED-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8
3134 // IR-GPU-NESTED-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
3135 // IR-GPU-NESTED-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
3136 // IR-GPU-NESTED-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]]
3137 // IR-GPU-NESTED-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
3138 // IR-GPU-NESTED-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3139 // IR-GPU-NESTED-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
3140 // IR-GPU-NESTED-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]]
3141 // IR-GPU-NESTED-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3142 // IR-GPU-NESTED-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3143 // IR-GPU-NESTED-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3144 // IR-GPU-NESTED-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]]
3145 // IR-GPU-NESTED-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]]
3146 // IR-GPU-NESTED: cond.true20:
3147 // IR-GPU-NESTED-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3148 // IR-GPU-NESTED-NEXT: br label [[COND_END22:%.*]]
3149 // IR-GPU-NESTED: cond.false21:
3150 // IR-GPU-NESTED-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3151 // IR-GPU-NESTED-NEXT: br label [[COND_END22]]
3152 // IR-GPU-NESTED: cond.end22:
3153 // IR-GPU-NESTED-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ]
3154 // IR-GPU-NESTED-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
3155 // IR-GPU-NESTED-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
3156 // IR-GPU-NESTED-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8
3157 // IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
3158 // IR-GPU-NESTED: omp.inner.for.end:
3159 // IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
3160 // IR-GPU-NESTED: omp.loop.exit:
3161 // IR-GPU-NESTED-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
3162 // IR-GPU-NESTED-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4
3163 // IR-GPU-NESTED-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]])
3164 // IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]]
3165 // IR-GPU-NESTED: omp.precond.end:
3166 // IR-GPU-NESTED-NEXT: ret void
3167 //
3168 //
3169 // IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined
3170 // IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
3171 // IR-GPU-NESTED-NEXT: entry:
3172 // IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3173 // IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3174 // IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
3175 // IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
3176 // IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
3177 // IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
3178 // IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3179 // IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
3180 // IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3181 // IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
3182 // IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
3183 // IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
3184 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
3185 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
3186 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
3187 // IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
3188 // IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
3189 // IR-GPU-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
3190 // IR-GPU-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
3191 // IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
3192 // IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
3193 // IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
3194 // IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
3195 // IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
3196 // IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
3197 // IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
3198 // IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
3199 // IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
3200 // IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
3201 // IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
3202 // IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
3203 // IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
3204 // IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
3205 // IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
3206 // IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
3207 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
3208 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
3209 // IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
3210 // IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
3211 // IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
3212 // IR-GPU-NESTED-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
3213 // IR-GPU-NESTED-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
3214 // IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
3215 // IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
3216 // IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
3217 // IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
3218 // IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
3219 // IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
3220 // IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
3221 // IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
3222 // IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
3223 // IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
3224 // IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
3225 // IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
3226 // IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
3227 // IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
3228 // IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
3229 // IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
3230 // IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
3231 // IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
3232 // IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
3233 // IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
3234 // IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3235 // IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
3236 // IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
3237 // IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3238 // IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
3239 // IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3240 // IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
3241 // IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
3242 // IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
3243 // IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
3244 // IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
3245 // IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3246 // IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
3247 // IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
3248 // IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
3249 // IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
3250 // IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
3251 // IR-GPU-NESTED: land.lhs.true:
3252 // IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3253 // IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
3254 // IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
3255 // IR-GPU-NESTED: omp.precond.then:
3256 // IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
3257 // IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
3258 // IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8
3259 // IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
3260 // IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
3261 // IR-GPU-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8
3262 // IR-GPU-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8
3263 // IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
3264 // IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
3265 // IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
3266 // IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
3267 // IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
3268 // IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
3269 // IR-GPU-NESTED-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8
3270 // IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3271 // IR-GPU-NESTED: omp.inner.for.cond:
3272 // IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
3273 // IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
3274 // IR-GPU-NESTED-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]]
3275 // IR-GPU-NESTED-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3276 // IR-GPU-NESTED: omp.inner.for.body:
3277 // IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
3278 // IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3279 // IR-GPU-NESTED-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0
3280 // IR-GPU-NESTED-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
3281 // IR-GPU-NESTED-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]]
3282 // IR-GPU-NESTED-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64
3283 // IR-GPU-NESTED-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]]
3284 // IR-GPU-NESTED-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1
3285 // IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]]
3286 // IR-GPU-NESTED-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32
3287 // IR-GPU-NESTED-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4
3288 // IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
3289 // IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
3290 // IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3291 // IR-GPU-NESTED-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0
3292 // IR-GPU-NESTED-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1
3293 // IR-GPU-NESTED-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]]
3294 // IR-GPU-NESTED-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64
3295 // IR-GPU-NESTED-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]]
3296 // IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
3297 // IR-GPU-NESTED-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0
3298 // IR-GPU-NESTED-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
3299 // IR-GPU-NESTED-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]]
3300 // IR-GPU-NESTED-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64
3301 // IR-GPU-NESTED-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]]
3302 // IR-GPU-NESTED-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]]
3303 // IR-GPU-NESTED-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1
3304 // IR-GPU-NESTED-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]]
3305 // IR-GPU-NESTED-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32
3306 // IR-GPU-NESTED-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4
3307 // IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
3308 // IR-GPU-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64
3309 // IR-GPU-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
3310 // IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
3311 // IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
3312 // IR-GPU-NESTED-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]]
3313 // IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
3314 // IR-GPU-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP27]]) #[[ATTR5:[0-9]+]]
3315 // IR-GPU-NESTED-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[CALL]]
3316 // IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
3317 // IR-GPU-NESTED-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64
3318 // IR-GPU-NESTED-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]]
3319 // IR-GPU-NESTED-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4
3320 // IR-GPU-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
3321 // IR-GPU-NESTED: omp.body.continue:
3322 // IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3323 // IR-GPU-NESTED: omp.inner.for.inc:
3324 // IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
3325 // IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
3326 // IR-GPU-NESTED-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]]
3327 // IR-GPU-NESTED-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8
3328 // IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
3329 // IR-GPU-NESTED: omp.inner.for.end:
3330 // IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
3331 // IR-GPU-NESTED: omp.loop.exit:
3332 // IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
3333 // IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
3334 // IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP32]])
3335 // IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]]
3336 // IR-GPU-NESTED: omp.precond.end:
3337 // IR-GPU-NESTED-NEXT: ret void
3338 //
3339 //
3340 // IR-NESTED-LABEL: define {{[^@]+}}@main
3341 // IR-NESTED-SAME: () #[[ATTR0:[0-9]+]] {
3342 // IR-NESTED-NEXT: entry:
3343 // IR-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
3344 // IR-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
3345 // IR-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
3346 // IR-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
3347 // IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
3348 // IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
3349 // IR-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
3350 // IR-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
3351 // IR-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
3352 // IR-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
3353 // IR-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
3354 // IR-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
3355 // IR-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
3356 // IR-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
3357 // IR-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
3358 // IR-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
3359 // IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
3360 // IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
3361 // IR-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
3362 // IR-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
3363 // IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
3364 // IR-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
3365 // IR-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]])
3366 // IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4
3367 // IR-NESTED-NEXT: ret i32 [[TMP8]]
3368 //
3369 //
3370 // IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
3371 // IR-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
3372 // IR-NESTED-NEXT: entry:
3373 // IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
3374 // IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
3375 // IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
3376 // IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
3377 // IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
3378 // IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
3379 // IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
3380 // IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
3381 // IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
3382 // IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
3383 // IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
3384 // IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
3385 // IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
3386 // IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
3387 // IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
3388 // IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
3389 // IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
3390 // IR-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
3391 // IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
3392 // IR-NESTED-NEXT: ret void
3393 //
3394 //
3395 // IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined
3396 // IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
3397 // IR-NESTED-NEXT: entry:
3398 // IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3399 // IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3400 // IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
3401 // IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
3402 // IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
3403 // IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
3404 // IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
3405 // IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
3406 // IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
3407 // IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
3408 // IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
3409 // IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
3410 // IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
3411 // IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
3412 // IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
3413 // IR-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
3414 // IR-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
3415 // IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
3416 // IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3417 // IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
3418 // IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
3419 // IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
3420 // IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
3421 // IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
3422 // IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
3423 // IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
3424 // IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
3425 // IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
3426 // IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
3427 // IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
3428 // IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
3429 // IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
3430 // IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
3431 // IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
3432 // IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
3433 // IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
3434 // IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
3435 // IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3436 // IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
3437 // IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3438 // IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
3439 // IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3440 // IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
3441 // IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
3442 // IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
3443 // IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
3444 // IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
3445 // IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
3446 // IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4
3447 // IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4
3448 // IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3449 // IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
3450 // IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
3451 // IR-NESTED: land.lhs.true:
3452 // IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3453 // IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
3454 // IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
3455 // IR-NESTED: omp.precond.then:
3456 // IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
3457 // IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3458 // IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
3459 // IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
3460 // IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3461 // IR-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3462 // IR-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
3463 // IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
3464 // IR-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3465 // IR-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3466 // IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
3467 // IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3468 // IR-NESTED: cond.true:
3469 // IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3470 // IR-NESTED-NEXT: br label [[COND_END:%.*]]
3471 // IR-NESTED: cond.false:
3472 // IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3473 // IR-NESTED-NEXT: br label [[COND_END]]
3474 // IR-NESTED: cond.end:
3475 // IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
3476 // IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
3477 // IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
3478 // IR-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
3479 // IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3480 // IR-NESTED: omp.inner.for.cond:
3481 // IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3482 // IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3483 // IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
3484 // IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3485 // IR-NESTED: omp.inner.for.body:
3486 // IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
3487 // IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3488 // IR-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
3489 // IR-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
3490 // IR-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
3491 // IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
3492 // IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3493 // IR-NESTED: omp.inner.for.inc:
3494 // IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3495 // IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
3496 // IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
3497 // IR-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
3498 // IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
3499 // IR-NESTED: omp.inner.for.end:
3500 // IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
3501 // IR-NESTED: omp.loop.exit:
3502 // IR-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3503 // IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
3504 // IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
3505 // IR-NESTED-NEXT: br label [[OMP_PRECOND_END]]
3506 // IR-NESTED: omp.precond.end:
3507 // IR-NESTED-NEXT: ret void
3508 //
3509 //
3510 // IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined
3511 // IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
3512 // IR-NESTED-NEXT: entry:
3513 // IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3514 // IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3515 // IR-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
3516 // IR-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
3517 // IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
3518 // IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
3519 // IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
3520 // IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
3521 // IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
3522 // IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
3523 // IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
3524 // IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
3525 // IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
3526 // IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
3527 // IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
3528 // IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
3529 // IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
3530 // IR-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
3531 // IR-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
3532 // IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
3533 // IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3534 // IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
3535 // IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
3536 // IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
3537 // IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
3538 // IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
3539 // IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
3540 // IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
3541 // IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
3542 // IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
3543 // IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
3544 // IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
3545 // IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
3546 // IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
3547 // IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
3548 // IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
3549 // IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
3550 // IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
3551 // IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
3552 // IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
3553 // IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3554 // IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
3555 // IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3556 // IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
3557 // IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3558 // IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
3559 // IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
3560 // IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
3561 // IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
3562 // IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
3563 // IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
3564 // IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4
3565 // IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4
3566 // IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3567 // IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
3568 // IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
3569 // IR-NESTED: land.lhs.true:
3570 // IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3571 // IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
3572 // IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
3573 // IR-NESTED: omp.precond.then:
3574 // IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
3575 // IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3576 // IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
3577 // IR-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
3578 // IR-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
3579 // IR-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
3580 // IR-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
3581 // IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
3582 // IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3583 // IR-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3584 // IR-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
3585 // IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
3586 // IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
3587 // IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3588 // IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
3589 // IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3590 // IR-NESTED: cond.true:
3591 // IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3592 // IR-NESTED-NEXT: br label [[COND_END:%.*]]
3593 // IR-NESTED: cond.false:
3594 // IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
3595 // IR-NESTED-NEXT: br label [[COND_END]]
3596 // IR-NESTED: cond.end:
3597 // IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
3598 // IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
3599 // IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
3600 // IR-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
3601 // IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3602 // IR-NESTED: omp.inner.for.cond:
3603 // IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3604 // IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
3605 // IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
3606 // IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3607 // IR-NESTED: omp.inner.for.body:
3608 // IR-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3609 // IR-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3610 // IR-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
3611 // IR-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
3612 // IR-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
3613 // IR-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
3614 // IR-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
3615 // IR-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
3616 // IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
3617 // IR-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
3618 // IR-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
3619 // IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3620 // IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3621 // IR-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3622 // IR-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
3623 // IR-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
3624 // IR-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
3625 // IR-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
3626 // IR-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
3627 // IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3628 // IR-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
3629 // IR-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
3630 // IR-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
3631 // IR-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
3632 // IR-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
3633 // IR-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
3634 // IR-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
3635 // IR-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
3636 // IR-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
3637 // IR-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
3638 // IR-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
3639 // IR-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
3640 // IR-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
3641 // IR-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
3642 // IR-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
3643 // IR-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
3644 // IR-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
3645 // IR-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]])
3646 // IR-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]]
3647 // IR-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
3648 // IR-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
3649 // IR-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
3650 // IR-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
3651 // IR-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
3652 // IR-NESTED: omp.body.continue:
3653 // IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3654 // IR-NESTED: omp.inner.for.inc:
3655 // IR-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3656 // IR-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
3657 // IR-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
3658 // IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
3659 // IR-NESTED: omp.inner.for.end:
3660 // IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
3661 // IR-NESTED: omp.loop.exit:
3662 // IR-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3663 // IR-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
3664 // IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]])
3665 // IR-NESTED-NEXT: br label [[OMP_PRECOND_END]]
3666 // IR-NESTED: omp.precond.end:
3667 // IR-NESTED-NEXT: ret void
3668 //
3669 //
3670 // IR-PCH-NESTED-LABEL: define {{[^@]+}}@main
3671 // IR-PCH-NESTED-SAME: () #[[ATTR0:[0-9]+]] {
3672 // IR-PCH-NESTED-NEXT: entry:
3673 // IR-PCH-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
3674 // IR-PCH-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
3675 // IR-PCH-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
3676 // IR-PCH-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
3677 // IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
3678 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
3679 // IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
3680 // IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
3681 // IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
3682 // IR-PCH-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
3683 // IR-PCH-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
3684 // IR-PCH-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
3685 // IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
3686 // IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
3687 // IR-PCH-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
3688 // IR-PCH-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
3689 // IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
3690 // IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
3691 // IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
3692 // IR-PCH-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
3693 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
3694 // IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
3695 // IR-PCH-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]])
3696 // IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4
3697 // IR-PCH-NESTED-NEXT: ret i32 [[TMP8]]
3698 //
3699 //
3700 // IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
3701 // IR-PCH-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
3702 // IR-PCH-NESTED-NEXT: entry:
3703 // IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
3704 // IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
3705 // IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
3706 // IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
3707 // IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
3708 // IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
3709 // IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
3710 // IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
3711 // IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
3712 // IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
3713 // IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
3714 // IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
3715 // IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
3716 // IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
3717 // IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
3718 // IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
3719 // IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
3720 // IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
3721 // IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
3722 // IR-PCH-NESTED-NEXT: ret void
3723 //
3724 //
3725 // IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined
3726 // IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
3727 // IR-PCH-NESTED-NEXT: entry:
3728 // IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3729 // IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3730 // IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
3731 // IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
3732 // IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
3733 // IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
3734 // IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
3735 // IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
3736 // IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
3737 // IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
3738 // IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
3739 // IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
3740 // IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
3741 // IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
3742 // IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
3743 // IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
3744 // IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
3745 // IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
3746 // IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3747 // IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
3748 // IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
3749 // IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
3750 // IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
3751 // IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
3752 // IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
3753 // IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
3754 // IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
3755 // IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
3756 // IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
3757 // IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
3758 // IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
3759 // IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
3760 // IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
3761 // IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
3762 // IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
3763 // IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
3764 // IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
3765 // IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3766 // IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
3767 // IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3768 // IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
3769 // IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3770 // IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
3771 // IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
3772 // IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
3773 // IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
3774 // IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
3775 // IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
3776 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4
3777 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4
3778 // IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3779 // IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
3780 // IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
3781 // IR-PCH-NESTED: land.lhs.true:
3782 // IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3783 // IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
3784 // IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
3785 // IR-PCH-NESTED: omp.precond.then:
3786 // IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
3787 // IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3788 // IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
3789 // IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
3790 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3791 // IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3792 // IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
3793 // IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
3794 // IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3795 // IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3796 // IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
3797 // IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3798 // IR-PCH-NESTED: cond.true:
3799 // IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3800 // IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]]
3801 // IR-PCH-NESTED: cond.false:
3802 // IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3803 // IR-PCH-NESTED-NEXT: br label [[COND_END]]
3804 // IR-PCH-NESTED: cond.end:
3805 // IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
3806 // IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
3807 // IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
3808 // IR-PCH-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
3809 // IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3810 // IR-PCH-NESTED: omp.inner.for.cond:
3811 // IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3812 // IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3813 // IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
3814 // IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3815 // IR-PCH-NESTED: omp.inner.for.body:
3816 // IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
3817 // IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
3818 // IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
3819 // IR-PCH-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
3820 // IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
3821 // IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
3822 // IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3823 // IR-PCH-NESTED: omp.inner.for.inc:
3824 // IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3825 // IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
3826 // IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
3827 // IR-PCH-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
3828 // IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
3829 // IR-PCH-NESTED: omp.inner.for.end:
3830 // IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
3831 // IR-PCH-NESTED: omp.loop.exit:
3832 // IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3833 // IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
3834 // IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
3835 // IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]]
3836 // IR-PCH-NESTED: omp.precond.end:
3837 // IR-PCH-NESTED-NEXT: ret void
3838 //
3839 //
3840 // IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined
3841 // IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
3842 // IR-PCH-NESTED-NEXT: entry:
3843 // IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3844 // IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3845 // IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
3846 // IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
3847 // IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
3848 // IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
3849 // IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
3850 // IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
3851 // IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
3852 // IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
3853 // IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
3854 // IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
3855 // IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
3856 // IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
3857 // IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
3858 // IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
3859 // IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
3860 // IR-PCH-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
3861 // IR-PCH-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
3862 // IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
3863 // IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
3864 // IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
3865 // IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
3866 // IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
3867 // IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
3868 // IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
3869 // IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
3870 // IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
3871 // IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
3872 // IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
3873 // IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
3874 // IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
3875 // IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
3876 // IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
3877 // IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
3878 // IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
3879 // IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
3880 // IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
3881 // IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
3882 // IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
3883 // IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3884 // IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
3885 // IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
3886 // IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
3887 // IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3888 // IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
3889 // IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
3890 // IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
3891 // IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
3892 // IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
3893 // IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
3894 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4
3895 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4
3896 // IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
3897 // IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
3898 // IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
3899 // IR-PCH-NESTED: land.lhs.true:
3900 // IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3901 // IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
3902 // IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
3903 // IR-PCH-NESTED: omp.precond.then:
3904 // IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
3905 // IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3906 // IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
3907 // IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
3908 // IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
3909 // IR-PCH-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
3910 // IR-PCH-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
3911 // IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
3912 // IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
3913 // IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3914 // IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
3915 // IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
3916 // IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
3917 // IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3918 // IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
3919 // IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
3920 // IR-PCH-NESTED: cond.true:
3921 // IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
3922 // IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]]
3923 // IR-PCH-NESTED: cond.false:
3924 // IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
3925 // IR-PCH-NESTED-NEXT: br label [[COND_END]]
3926 // IR-PCH-NESTED: cond.end:
3927 // IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
3928 // IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
3929 // IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
3930 // IR-PCH-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
3931 // IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
3932 // IR-PCH-NESTED: omp.inner.for.cond:
3933 // IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3934 // IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
3935 // IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
3936 // IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
3937 // IR-PCH-NESTED: omp.inner.for.body:
3938 // IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3939 // IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3940 // IR-PCH-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
3941 // IR-PCH-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
3942 // IR-PCH-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
3943 // IR-PCH-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
3944 // IR-PCH-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
3945 // IR-PCH-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
3946 // IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
3947 // IR-PCH-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
3948 // IR-PCH-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
3949 // IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3950 // IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3951 // IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3952 // IR-PCH-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
3953 // IR-PCH-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
3954 // IR-PCH-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
3955 // IR-PCH-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
3956 // IR-PCH-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
3957 // IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
3958 // IR-PCH-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
3959 // IR-PCH-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
3960 // IR-PCH-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
3961 // IR-PCH-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
3962 // IR-PCH-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
3963 // IR-PCH-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
3964 // IR-PCH-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
3965 // IR-PCH-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
3966 // IR-PCH-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
3967 // IR-PCH-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
3968 // IR-PCH-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
3969 // IR-PCH-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
3970 // IR-PCH-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
3971 // IR-PCH-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
3972 // IR-PCH-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
3973 // IR-PCH-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
3974 // IR-PCH-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
3975 // IR-PCH-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]])
3976 // IR-PCH-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]]
3977 // IR-PCH-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
3978 // IR-PCH-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
3979 // IR-PCH-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
3980 // IR-PCH-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
3981 // IR-PCH-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
3982 // IR-PCH-NESTED: omp.body.continue:
3983 // IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
3984 // IR-PCH-NESTED: omp.inner.for.inc:
3985 // IR-PCH-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
3986 // IR-PCH-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
3987 // IR-PCH-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
3988 // IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
3989 // IR-PCH-NESTED: omp.inner.for.end:
3990 // IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
3991 // IR-PCH-NESTED: omp.loop.exit:
3992 // IR-PCH-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
3993 // IR-PCH-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
3994 // IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]])
3995 // IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]]
3996 // IR-PCH-NESTED: omp.precond.end:
3997 // IR-PCH-NESTED-NEXT: ret void
3998 //
3999