xref: /llvm-project/clang/test/OpenMP/target_firstprivate_codegen.cpp (revision 94473f4db6a6f5f12d7c4081455b5b596094eac5)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test host codegen.
3 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK0
4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
5 // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK1
6 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK2
7 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
8 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK3
9 
10 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
11 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
12 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY01 %s
13 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY02 %s
14 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
15 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY03 %s
16 
17 // Test target codegen - host bc file has to be created first.
18 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
19 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK
20 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
21 // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK1
22 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
23 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK2
24 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
25 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK3
26 
27 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
28 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
29 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
30 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s
31 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
32 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY12 %s
33 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
34 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s
35 
36 // expected-no-diagnostics
37 #ifndef HEADER
38 #define HEADER
39 
40 template <typename tx, typename ty>
41 struct TT {
42   tx X;
43   ty Y;
44 };
45 #pragma omp declare target
46 int ga = 5;
47 #pragma omp end declare target
48 
49 
50 
51 
52 int foo(int n, double *ptr) {
53   int a = 0;
54   short aa = 0;
55   float b[10];
56   float bn[n];
57   double c[5][10];
58   double cn[5][n];
59   TT<long long, char> d;
60   const TT<int, int> e = {n, n};
61   int *p __attribute__ ((aligned (64))) = &a;
62 
63 #pragma omp target firstprivate(a, p, ga)
64   {
65   }
66 
67   // a is passed by value to tgt_target
68 
69 
70 #pragma omp target firstprivate(aa, b, bn, c, cn, d)
71   {
72     aa += 1;
73     b[2] = 1.0;
74     bn[3] = 1.0;
75     c[1][2] = 1.0;
76     cn[1][3] = 1.0;
77     d.X = 1;
78     d.Y = 1;
79   }
80 
81 
82   // firstprivate(aa) --> base_ptr = aa, ptr = aa, size = 2 (short)
83 
84   // firstprivate(b): base_ptr = &b[0], ptr = &b[0], size = 40 (sizeof(float)*10)
85 
86   // firstprivate(bn), 2 entries, n and bn: (1) base_ptr = n, ptr = n, size = 8 ; (2) base_ptr = &c[0], ptr = &c[0], size = n*sizeof(float)
87 
88   // firstprivate(c): base_ptr = &c[0], ptr = &c[0], size = 400 (5*10*sizeof(double))
89 
90   // firstprivate(cn), 3 entries, 5, n, cn: (1) base_ptr = 5, ptr = 5, size = 8; (2) (1) base_ptr = n, ptr = n, size = 8; (3) base_ptr = &cn[0], ptr = &cn[0], size = 5*n*sizeof(double)
91 
92   // firstprivate(d): base_ptr = &d, ptr = &d, size = 16
93 
94 
95   // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
96   // target region
97 
98   // firstprivate(aa): a_priv = a_in
99 
100   //  firstprivate(b): memcpy(b_priv,b_in)
101 
102 
103   // firstprivate(bn)
104 
105   // firstprivate(c)
106 
107   // firstprivate(cn)
108 
109   // firstprivate(d)
110 
111 #pragma omp target firstprivate(ptr, e)
112   {
113     ptr[0] = e.X;
114     ptr[0]++;
115   }
116 
117 
118 
119 
120   return a;
121 }
122 
123 template <typename tx>
124 tx ftemplate(int n) {
125   tx a = 0;
126   tx b[10];
127 
128 #pragma omp target firstprivate(a, b)
129   {
130     a += 1;
131     b[2] += 1;
132   }
133 
134   return a;
135 }
136 
137 static int fstatic(int n) {
138   int a = 0;
139   char aaa = 0;
140   int b[10];
141 
142 #pragma omp target firstprivate(a, aaa, b)
143   {
144     a += 1;
145     aaa += 1;
146     b[2] += 1;
147   }
148 
149   return a;
150 }
151 
152 
153 // firstprivate(a): a_priv = a_in
154 
155 // firstprivate(aaa)
156 
157 // firstprivate(b)
158 
159 
160 struct S1 {
161   double a;
162 
163   int r1(int n) {
164     int b = n + 1;
165     short int c[2][n];
166 
167 #pragma omp target firstprivate(b, c)
168     {
169       this->a = (double)b + 1.5;
170       c[1][1] = ++a;
171     }
172 
173     return c[1][1] + (int)b;
174   }
175 
176   // on the host side, we first generate r1, then the static function and the template above
177 
178   // map(this: this ptr is implicitly captured (not firstprivate matter)
179 
180   // firstprivate(b): base_ptr = b, ptr = b, size = 4 (pass by-value)
181 
182   // firstprivate(c), 3 entries: 2, n, c
183 
184   // only check that we use the map types stored in the global variable
185 
186 
187 
188   // firstprivate(b)
189 
190 
191   // firstprivate(c)
192 
193   // finish
194 
195   // static host function
196 
197   // firstprivate(a): by value
198 
199   // firstprivate(aaa): by value
200 
201   // firstprivate(b): base_ptr = &b[0], ptr= &b[0]
202 
203   // only check that the right sizes and map types are used
204 };
205 
206 int bar(int n, double *ptr) {
207   int a = 0;
208   a += foo(n, ptr);
209   S1 S;
210   a += S.r1(n);
211   a += fstatic(n);
212   a += ftemplate<int>(n);
213 
214   return a;
215 }
216 
217 // template host and device
218 
219 
220 // firstprivate(a): by value
221 
222 // firstprivate(b): pointer
223 
224 
225 
226 // firstprivate(a)
227 
228 // firstprivate(b)
229 
230 
231 #endif
232 // CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
233 // CHECK-SAME: () #[[ATTR5:[0-9]+]] {
234 // CHECK-NEXT:  entry:
235 // CHECK-NEXT:    call void @__tgt_register_requires(i64 1)
236 // CHECK-NEXT:    ret void
237 // CHECK-64-LABEL: define {{[^@]+}}@_Z3fooiPd
238 // CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
239 // CHECK-64-NEXT:  entry:
240 // CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
241 // CHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
242 // CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
243 // CHECK-64-NEXT:    [[AA:%.*]] = alloca i16, align 2
244 // CHECK-64-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
245 // CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
246 // CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
247 // CHECK-64-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
248 // CHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
249 // CHECK-64-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
250 // CHECK-64-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
251 // CHECK-64-NEXT:    [[P:%.*]] = alloca i32*, align 64
252 // CHECK-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
253 // CHECK-64-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
254 // CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
255 // CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
256 // CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
257 // CHECK-64-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
258 // CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
259 // CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
260 // CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
261 // CHECK-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
262 // CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
263 // CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
264 // CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
265 // CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
266 // CHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
267 // CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
268 // CHECK-64-NEXT:    store i16 0, i16* [[AA]], align 2
269 // CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
270 // CHECK-64-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
271 // CHECK-64-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
272 // CHECK-64-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
273 // CHECK-64-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
274 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
275 // CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
276 // CHECK-64-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
277 // CHECK-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
278 // CHECK-64-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
279 // CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
280 // CHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
281 // CHECK-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
282 // CHECK-64-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
283 // CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
284 // CHECK-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
285 // CHECK-64-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
286 // CHECK-64-NEXT:    store i32* [[A]], i32** [[P]], align 64
287 // CHECK-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
288 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
289 // CHECK-64-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
290 // CHECK-64-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
291 // CHECK-64-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
292 // CHECK-64-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
293 // CHECK-64-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
294 // CHECK-64-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
295 // CHECK-64-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
296 // CHECK-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
297 // CHECK-64-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
298 // CHECK-64-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
299 // CHECK-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
300 // CHECK-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
301 // CHECK-64-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
302 // CHECK-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
303 // CHECK-64-NEXT:    store i8* null, i8** [[TMP17]], align 8
304 // CHECK-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
305 // CHECK-64-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
306 // CHECK-64-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
307 // CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
308 // CHECK-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
309 // CHECK-64-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
310 // CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
311 // CHECK-64-NEXT:    store i8* null, i8** [[TMP22]], align 8
312 // CHECK-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
313 // CHECK-64-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
314 // CHECK-64-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
315 // CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
316 // CHECK-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
317 // CHECK-64-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
318 // CHECK-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
319 // CHECK-64-NEXT:    store i8* null, i8** [[TMP27]], align 8
320 // CHECK-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
321 // CHECK-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
322 // CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
323 // CHECK-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
324 // CHECK-64-NEXT:    store i32 2, i32* [[TMP30]], align 4
325 // CHECK-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
326 // CHECK-64-NEXT:    store i32 3, i32* [[TMP31]], align 4
327 // CHECK-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
328 // CHECK-64-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
329 // CHECK-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
330 // CHECK-64-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
331 // CHECK-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
332 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
333 // CHECK-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
334 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
335 // CHECK-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
336 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP36]], align 8
337 // CHECK-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
338 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP37]], align 8
339 // CHECK-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
340 // CHECK-64-NEXT:    store i64 0, i64* [[TMP38]], align 8
341 // CHECK-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
342 // CHECK-64-NEXT:    store i64 0, i64* [[TMP39]], align 8
343 // CHECK-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
344 // CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
345 // CHECK-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
346 // CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
347 // CHECK-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
348 // CHECK-64-NEXT:    store i32 0, i32* [[TMP42]], align 4
349 // CHECK-64-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
350 // CHECK-64-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
351 // CHECK-64-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
352 // CHECK-64:       omp_offload.failed:
353 // CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
354 // CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
355 // CHECK-64:       omp_offload.cont:
356 // CHECK-64-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
357 // CHECK-64-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
358 // CHECK-64-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
359 // CHECK-64-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
360 // CHECK-64-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
361 // CHECK-64-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
362 // CHECK-64-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
363 // CHECK-64-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
364 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
365 // CHECK-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
366 // CHECK-64-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
367 // CHECK-64-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
368 // CHECK-64-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
369 // CHECK-64-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
370 // CHECK-64-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
371 // CHECK-64-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
372 // CHECK-64-NEXT:    store i8* null, i8** [[TMP55]], align 8
373 // CHECK-64-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
374 // CHECK-64-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
375 // CHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
376 // CHECK-64-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
377 // CHECK-64-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
378 // CHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
379 // CHECK-64-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
380 // CHECK-64-NEXT:    store i8* null, i8** [[TMP60]], align 8
381 // CHECK-64-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
382 // CHECK-64-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
383 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
384 // CHECK-64-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
385 // CHECK-64-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
386 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
387 // CHECK-64-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
388 // CHECK-64-NEXT:    store i8* null, i8** [[TMP65]], align 8
389 // CHECK-64-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
390 // CHECK-64-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
391 // CHECK-64-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
392 // CHECK-64-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
393 // CHECK-64-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
394 // CHECK-64-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
395 // CHECK-64-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
396 // CHECK-64-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
397 // CHECK-64-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
398 // CHECK-64-NEXT:    store i8* null, i8** [[TMP71]], align 8
399 // CHECK-64-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
400 // CHECK-64-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
401 // CHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
402 // CHECK-64-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
403 // CHECK-64-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
404 // CHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
405 // CHECK-64-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
406 // CHECK-64-NEXT:    store i8* null, i8** [[TMP76]], align 8
407 // CHECK-64-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
408 // CHECK-64-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
409 // CHECK-64-NEXT:    store i64 5, i64* [[TMP78]], align 8
410 // CHECK-64-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
411 // CHECK-64-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
412 // CHECK-64-NEXT:    store i64 5, i64* [[TMP80]], align 8
413 // CHECK-64-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
414 // CHECK-64-NEXT:    store i8* null, i8** [[TMP81]], align 8
415 // CHECK-64-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
416 // CHECK-64-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
417 // CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
418 // CHECK-64-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
419 // CHECK-64-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
420 // CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
421 // CHECK-64-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
422 // CHECK-64-NEXT:    store i8* null, i8** [[TMP86]], align 8
423 // CHECK-64-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
424 // CHECK-64-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
425 // CHECK-64-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
426 // CHECK-64-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
427 // CHECK-64-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
428 // CHECK-64-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
429 // CHECK-64-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
430 // CHECK-64-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
431 // CHECK-64-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
432 // CHECK-64-NEXT:    store i8* null, i8** [[TMP92]], align 8
433 // CHECK-64-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
434 // CHECK-64-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
435 // CHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
436 // CHECK-64-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
437 // CHECK-64-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
438 // CHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
439 // CHECK-64-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
440 // CHECK-64-NEXT:    store i8* null, i8** [[TMP97]], align 8
441 // CHECK-64-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
442 // CHECK-64-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
443 // CHECK-64-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
444 // CHECK-64-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
445 // CHECK-64-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
446 // CHECK-64-NEXT:    store i32 2, i32* [[TMP101]], align 4
447 // CHECK-64-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
448 // CHECK-64-NEXT:    store i32 9, i32* [[TMP102]], align 4
449 // CHECK-64-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
450 // CHECK-64-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
451 // CHECK-64-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
452 // CHECK-64-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
453 // CHECK-64-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
454 // CHECK-64-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
455 // CHECK-64-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
456 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
457 // CHECK-64-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
458 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP107]], align 8
459 // CHECK-64-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
460 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP108]], align 8
461 // CHECK-64-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
462 // CHECK-64-NEXT:    store i64 0, i64* [[TMP109]], align 8
463 // CHECK-64-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
464 // CHECK-64-NEXT:    store i64 0, i64* [[TMP110]], align 8
465 // CHECK-64-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
466 // CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
467 // CHECK-64-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
468 // CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
469 // CHECK-64-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
470 // CHECK-64-NEXT:    store i32 0, i32* [[TMP113]], align 4
471 // CHECK-64-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
472 // CHECK-64-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
473 // CHECK-64-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
474 // CHECK-64:       omp_offload.failed8:
475 // CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
476 // CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
477 // CHECK-64:       omp_offload.cont9:
478 // CHECK-64-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
479 // CHECK-64-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
480 // CHECK-64-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
481 // CHECK-64-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
482 // CHECK-64-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
483 // CHECK-64-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
484 // CHECK-64-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
485 // CHECK-64-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
486 // CHECK-64-NEXT:    store i8* null, i8** [[TMP121]], align 8
487 // CHECK-64-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
488 // CHECK-64-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
489 // CHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
490 // CHECK-64-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
491 // CHECK-64-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
492 // CHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
493 // CHECK-64-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
494 // CHECK-64-NEXT:    store i8* null, i8** [[TMP126]], align 8
495 // CHECK-64-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
496 // CHECK-64-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
497 // CHECK-64-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
498 // CHECK-64-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
499 // CHECK-64-NEXT:    store i32 2, i32* [[TMP129]], align 4
500 // CHECK-64-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
501 // CHECK-64-NEXT:    store i32 2, i32* [[TMP130]], align 4
502 // CHECK-64-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
503 // CHECK-64-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
504 // CHECK-64-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
505 // CHECK-64-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
506 // CHECK-64-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
507 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
508 // CHECK-64-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
509 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
510 // CHECK-64-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
511 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP135]], align 8
512 // CHECK-64-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
513 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP136]], align 8
514 // CHECK-64-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
515 // CHECK-64-NEXT:    store i64 0, i64* [[TMP137]], align 8
516 // CHECK-64-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
517 // CHECK-64-NEXT:    store i64 0, i64* [[TMP138]], align 8
518 // CHECK-64-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
519 // CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
520 // CHECK-64-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
521 // CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
522 // CHECK-64-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
523 // CHECK-64-NEXT:    store i32 0, i32* [[TMP141]], align 4
524 // CHECK-64-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
525 // CHECK-64-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
526 // CHECK-64-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
527 // CHECK-64:       omp_offload.failed14:
528 // CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
529 // CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
530 // CHECK-64:       omp_offload.cont15:
531 // CHECK-64-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
532 // CHECK-64-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
533 // CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
534 // CHECK-64-NEXT:    ret i32 [[TMP144]]
535 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
536 // CHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
537 // CHECK-64-NEXT:  entry:
538 // CHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
539 // CHECK-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
540 // CHECK-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
541 // CHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
542 // CHECK-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
543 // CHECK-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
544 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
545 // CHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
546 // CHECK-64-NEXT:    ret void
547 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
548 // CHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
549 // CHECK-64-NEXT:  entry:
550 // CHECK-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
551 // CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
552 // CHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
553 // CHECK-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
554 // CHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
555 // CHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
556 // CHECK-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
557 // CHECK-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
558 // CHECK-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
559 // CHECK-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
560 // CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
561 // CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
562 // CHECK-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
563 // CHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
564 // CHECK-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
565 // CHECK-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
566 // CHECK-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
567 // CHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
568 // CHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
569 // CHECK-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
570 // CHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
571 // CHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
572 // CHECK-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
573 // CHECK-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
574 // CHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
575 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
576 // CHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
577 // CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
578 // CHECK-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
579 // CHECK-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
580 // CHECK-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
581 // CHECK-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
582 // CHECK-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
583 // CHECK-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
584 // CHECK-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
585 // CHECK-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
586 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
587 // CHECK-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
588 // CHECK-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
589 // CHECK-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
590 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
591 // CHECK-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
592 // CHECK-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
593 // CHECK-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
594 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
595 // CHECK-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
596 // CHECK-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
597 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
598 // CHECK-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
599 // CHECK-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
600 // CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
601 // CHECK-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
602 // CHECK-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
603 // CHECK-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
604 // CHECK-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
605 // CHECK-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
606 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
607 // CHECK-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
608 // CHECK-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
609 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
610 // CHECK-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
611 // CHECK-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
612 // CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
613 // CHECK-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
614 // CHECK-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
615 // CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
616 // CHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
617 // CHECK-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
618 // CHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
619 // CHECK-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
620 // CHECK-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
621 // CHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
622 // CHECK-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
623 // CHECK-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
624 // CHECK-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
625 // CHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
626 // CHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
627 // CHECK-64-NEXT:    store i64 1, i64* [[X]], align 8
628 // CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
629 // CHECK-64-NEXT:    store i8 1, i8* [[Y]], align 8
630 // CHECK-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
631 // CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
632 // CHECK-64-NEXT:    ret void
633 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
634 // CHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
635 // CHECK-64-NEXT:  entry:
636 // CHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
637 // CHECK-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
638 // CHECK-64-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
639 // CHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
640 // CHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
641 // CHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
642 // CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
643 // CHECK-64-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
644 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
645 // CHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
646 // CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
647 // CHECK-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
648 // CHECK-64-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
649 // CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
650 // CHECK-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
651 // CHECK-64-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
652 // CHECK-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
653 // CHECK-64-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
654 // CHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
655 // CHECK-64-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
656 // CHECK-64-NEXT:    ret void
657 // CHECK-64-LABEL: define {{[^@]+}}@_Z3bariPd
658 // CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
659 // CHECK-64-NEXT:  entry:
660 // CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
661 // CHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
662 // CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
663 // CHECK-64-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
664 // CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
665 // CHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
666 // CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
667 // CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
668 // CHECK-64-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
669 // CHECK-64-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
670 // CHECK-64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
671 // CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
672 // CHECK-64-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
673 // CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
674 // CHECK-64-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
675 // CHECK-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
676 // CHECK-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
677 // CHECK-64-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
678 // CHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
679 // CHECK-64-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
680 // CHECK-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
681 // CHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
682 // CHECK-64-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
683 // CHECK-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
684 // CHECK-64-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
685 // CHECK-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
686 // CHECK-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
687 // CHECK-64-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
688 // CHECK-64-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
689 // CHECK-64-NEXT:    ret i32 [[TMP9]]
690 // CHECK-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
691 // CHECK-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
692 // CHECK-64-NEXT:  entry:
693 // CHECK-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
694 // CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
695 // CHECK-64-NEXT:    [[B:%.*]] = alloca i32, align 4
696 // CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
697 // CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
698 // CHECK-64-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
699 // CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
700 // CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
701 // CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
702 // CHECK-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
703 // CHECK-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
704 // CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
705 // CHECK-64-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
706 // CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
707 // CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
708 // CHECK-64-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
709 // CHECK-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
710 // CHECK-64-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
711 // CHECK-64-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
712 // CHECK-64-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
713 // CHECK-64-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
714 // CHECK-64-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
715 // CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
716 // CHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
717 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
718 // CHECK-64-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
719 // CHECK-64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
720 // CHECK-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
721 // CHECK-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
722 // CHECK-64-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
723 // CHECK-64-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
724 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
725 // CHECK-64-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
726 // CHECK-64-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
727 // CHECK-64-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
728 // CHECK-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
729 // CHECK-64-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
730 // CHECK-64-NEXT:    store double* [[A]], double** [[TMP13]], align 8
731 // CHECK-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
732 // CHECK-64-NEXT:    store i8* null, i8** [[TMP14]], align 8
733 // CHECK-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
734 // CHECK-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
735 // CHECK-64-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
736 // CHECK-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
737 // CHECK-64-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
738 // CHECK-64-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
739 // CHECK-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
740 // CHECK-64-NEXT:    store i8* null, i8** [[TMP19]], align 8
741 // CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
742 // CHECK-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
743 // CHECK-64-NEXT:    store i64 2, i64* [[TMP21]], align 8
744 // CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
745 // CHECK-64-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
746 // CHECK-64-NEXT:    store i64 2, i64* [[TMP23]], align 8
747 // CHECK-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
748 // CHECK-64-NEXT:    store i8* null, i8** [[TMP24]], align 8
749 // CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
750 // CHECK-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
751 // CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
752 // CHECK-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
753 // CHECK-64-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
754 // CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
755 // CHECK-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
756 // CHECK-64-NEXT:    store i8* null, i8** [[TMP29]], align 8
757 // CHECK-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
758 // CHECK-64-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
759 // CHECK-64-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
760 // CHECK-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
761 // CHECK-64-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
762 // CHECK-64-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
763 // CHECK-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
764 // CHECK-64-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
765 // CHECK-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
766 // CHECK-64-NEXT:    store i8* null, i8** [[TMP35]], align 8
767 // CHECK-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
768 // CHECK-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
769 // CHECK-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
770 // CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
771 // CHECK-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
772 // CHECK-64-NEXT:    store i32 2, i32* [[TMP39]], align 4
773 // CHECK-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
774 // CHECK-64-NEXT:    store i32 5, i32* [[TMP40]], align 4
775 // CHECK-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
776 // CHECK-64-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
777 // CHECK-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
778 // CHECK-64-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
779 // CHECK-64-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
780 // CHECK-64-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
781 // CHECK-64-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
782 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
783 // CHECK-64-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
784 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP45]], align 8
785 // CHECK-64-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
786 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP46]], align 8
787 // CHECK-64-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
788 // CHECK-64-NEXT:    store i64 0, i64* [[TMP47]], align 8
789 // CHECK-64-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
790 // CHECK-64-NEXT:    store i64 0, i64* [[TMP48]], align 8
791 // CHECK-64-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
792 // CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
793 // CHECK-64-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
794 // CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
795 // CHECK-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
796 // CHECK-64-NEXT:    store i32 0, i32* [[TMP51]], align 4
797 // CHECK-64-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
798 // CHECK-64-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
799 // CHECK-64-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
800 // CHECK-64:       omp_offload.failed:
801 // CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
802 // CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
803 // CHECK-64:       omp_offload.cont:
804 // CHECK-64-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
805 // CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
806 // CHECK-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
807 // CHECK-64-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
808 // CHECK-64-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
809 // CHECK-64-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
810 // CHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
811 // CHECK-64-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
812 // CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
813 // CHECK-64-NEXT:    ret i32 [[ADD4]]
814 // CHECK-64-LABEL: define {{[^@]+}}@_ZL7fstatici
815 // CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
816 // CHECK-64-NEXT:  entry:
817 // CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
818 // CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
819 // CHECK-64-NEXT:    [[AAA:%.*]] = alloca i8, align 1
820 // CHECK-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
821 // CHECK-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
822 // CHECK-64-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
823 // CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
824 // CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
825 // CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
826 // CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
827 // CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
828 // CHECK-64-NEXT:    store i8 0, i8* [[AAA]], align 1
829 // CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
830 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
831 // CHECK-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
832 // CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
833 // CHECK-64-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
834 // CHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
835 // CHECK-64-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
836 // CHECK-64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
837 // CHECK-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
838 // CHECK-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
839 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
840 // CHECK-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
841 // CHECK-64-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
842 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
843 // CHECK-64-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
844 // CHECK-64-NEXT:    store i8* null, i8** [[TMP8]], align 8
845 // CHECK-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
846 // CHECK-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
847 // CHECK-64-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
848 // CHECK-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
849 // CHECK-64-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
850 // CHECK-64-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
851 // CHECK-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
852 // CHECK-64-NEXT:    store i8* null, i8** [[TMP13]], align 8
853 // CHECK-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
854 // CHECK-64-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
855 // CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
856 // CHECK-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
857 // CHECK-64-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
858 // CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
859 // CHECK-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
860 // CHECK-64-NEXT:    store i8* null, i8** [[TMP18]], align 8
861 // CHECK-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
862 // CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
863 // CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
864 // CHECK-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
865 // CHECK-64-NEXT:    store i32 2, i32* [[TMP21]], align 4
866 // CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
867 // CHECK-64-NEXT:    store i32 3, i32* [[TMP22]], align 4
868 // CHECK-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
869 // CHECK-64-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
870 // CHECK-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
871 // CHECK-64-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
872 // CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
873 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
874 // CHECK-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
875 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
876 // CHECK-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
877 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP27]], align 8
878 // CHECK-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
879 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP28]], align 8
880 // CHECK-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
881 // CHECK-64-NEXT:    store i64 0, i64* [[TMP29]], align 8
882 // CHECK-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
883 // CHECK-64-NEXT:    store i64 0, i64* [[TMP30]], align 8
884 // CHECK-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
885 // CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
886 // CHECK-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
887 // CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
888 // CHECK-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
889 // CHECK-64-NEXT:    store i32 0, i32* [[TMP33]], align 4
890 // CHECK-64-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
891 // CHECK-64-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
892 // CHECK-64-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
893 // CHECK-64:       omp_offload.failed:
894 // CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
895 // CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
896 // CHECK-64:       omp_offload.cont:
897 // CHECK-64-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
898 // CHECK-64-NEXT:    ret i32 [[TMP36]]
899 // CHECK-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
900 // CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
901 // CHECK-64-NEXT:  entry:
902 // CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
903 // CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
904 // CHECK-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
905 // CHECK-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
906 // CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
907 // CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
908 // CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
909 // CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
910 // CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
911 // CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
912 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
913 // CHECK-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
914 // CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
915 // CHECK-64-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
916 // CHECK-64-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
917 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
918 // CHECK-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
919 // CHECK-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
920 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
921 // CHECK-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
922 // CHECK-64-NEXT:    store i8* null, i8** [[TMP6]], align 8
923 // CHECK-64-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
924 // CHECK-64-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
925 // CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
926 // CHECK-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
927 // CHECK-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
928 // CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
929 // CHECK-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
930 // CHECK-64-NEXT:    store i8* null, i8** [[TMP11]], align 8
931 // CHECK-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
932 // CHECK-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
933 // CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
934 // CHECK-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
935 // CHECK-64-NEXT:    store i32 2, i32* [[TMP14]], align 4
936 // CHECK-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
937 // CHECK-64-NEXT:    store i32 2, i32* [[TMP15]], align 4
938 // CHECK-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
939 // CHECK-64-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
940 // CHECK-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
941 // CHECK-64-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
942 // CHECK-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
943 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
944 // CHECK-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
945 // CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
946 // CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
947 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP20]], align 8
948 // CHECK-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
949 // CHECK-64-NEXT:    store i8** null, i8*** [[TMP21]], align 8
950 // CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
951 // CHECK-64-NEXT:    store i64 0, i64* [[TMP22]], align 8
952 // CHECK-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
953 // CHECK-64-NEXT:    store i64 0, i64* [[TMP23]], align 8
954 // CHECK-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
955 // CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
956 // CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
957 // CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
958 // CHECK-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
959 // CHECK-64-NEXT:    store i32 0, i32* [[TMP26]], align 4
960 // CHECK-64-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
961 // CHECK-64-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
962 // CHECK-64-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
963 // CHECK-64:       omp_offload.failed:
964 // CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
965 // CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
966 // CHECK-64:       omp_offload.cont:
967 // CHECK-64-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
968 // CHECK-64-NEXT:    ret i32 [[TMP29]]
969 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
970 // CHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
971 // CHECK-64-NEXT:  entry:
972 // CHECK-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
973 // CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
974 // CHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
975 // CHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
976 // CHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
977 // CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
978 // CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
979 // CHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
980 // CHECK-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
981 // CHECK-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
982 // CHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
983 // CHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
984 // CHECK-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
985 // CHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
986 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
987 // CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
988 // CHECK-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
989 // CHECK-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
990 // CHECK-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
991 // CHECK-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
992 // CHECK-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
993 // CHECK-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
994 // CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
995 // CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
996 // CHECK-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
997 // CHECK-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
998 // CHECK-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
999 // CHECK-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
1000 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
1001 // CHECK-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
1002 // CHECK-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
1003 // CHECK-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
1004 // CHECK-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
1005 // CHECK-64-NEXT:    store double [[ADD]], double* [[A]], align 8
1006 // CHECK-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
1007 // CHECK-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
1008 // CHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
1009 // CHECK-64-NEXT:    store double [[INC]], double* [[A5]], align 8
1010 // CHECK-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
1011 // CHECK-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
1012 // CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
1013 // CHECK-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
1014 // CHECK-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
1015 // CHECK-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
1016 // CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
1017 // CHECK-64-NEXT:    ret void
1018 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
1019 // CHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
1020 // CHECK-64-NEXT:  entry:
1021 // CHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
1022 // CHECK-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
1023 // CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
1024 // CHECK-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
1025 // CHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
1026 // CHECK-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
1027 // CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
1028 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
1029 // CHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
1030 // CHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
1031 // CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
1032 // CHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
1033 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
1034 // CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
1035 // CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
1036 // CHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
1037 // CHECK-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
1038 // CHECK-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
1039 // CHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
1040 // CHECK-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
1041 // CHECK-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
1042 // CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
1043 // CHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1044 // CHECK-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
1045 // CHECK-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
1046 // CHECK-64-NEXT:    ret void
1047 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
1048 // CHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
1049 // CHECK-64-NEXT:  entry:
1050 // CHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
1051 // CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
1052 // CHECK-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
1053 // CHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
1054 // CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
1055 // CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
1056 // CHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
1057 // CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
1058 // CHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
1059 // CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
1060 // CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
1061 // CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
1062 // CHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
1063 // CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
1064 // CHECK-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1065 // CHECK-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
1066 // CHECK-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
1067 // CHECK-64-NEXT:    ret void
1068 // CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
1069 // CHECK-64-SAME: () #[[ATTR5:[0-9]+]] {
1070 // CHECK-64-NEXT:  entry:
1071 // CHECK-64-NEXT:    call void @__tgt_register_requires(i64 1)
1072 // CHECK-64-NEXT:    ret void
1073 // CHECK-32-LABEL: define {{[^@]+}}@_Z3fooiPd
1074 // CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
1075 // CHECK-32-NEXT:  entry:
1076 // CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
1077 // CHECK-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
1078 // CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
1079 // CHECK-32-NEXT:    [[AA:%.*]] = alloca i16, align 2
1080 // CHECK-32-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
1081 // CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
1082 // CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
1083 // CHECK-32-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
1084 // CHECK-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
1085 // CHECK-32-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
1086 // CHECK-32-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
1087 // CHECK-32-NEXT:    [[P:%.*]] = alloca i32*, align 64
1088 // CHECK-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
1089 // CHECK-32-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
1090 // CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
1091 // CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
1092 // CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
1093 // CHECK-32-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
1094 // CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
1095 // CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
1096 // CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
1097 // CHECK-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
1098 // CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
1099 // CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
1100 // CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
1101 // CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
1102 // CHECK-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
1103 // CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
1104 // CHECK-32-NEXT:    store i16 0, i16* [[AA]], align 2
1105 // CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
1106 // CHECK-32-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
1107 // CHECK-32-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
1108 // CHECK-32-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
1109 // CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
1110 // CHECK-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
1111 // CHECK-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
1112 // CHECK-32-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
1113 // CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
1114 // CHECK-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
1115 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
1116 // CHECK-32-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
1117 // CHECK-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
1118 // CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
1119 // CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
1120 // CHECK-32-NEXT:    store i32* [[A]], i32** [[P]], align 64
1121 // CHECK-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
1122 // CHECK-32-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
1123 // CHECK-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
1124 // CHECK-32-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
1125 // CHECK-32-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
1126 // CHECK-32-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
1127 // CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
1128 // CHECK-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1129 // CHECK-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
1130 // CHECK-32-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
1131 // CHECK-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1132 // CHECK-32-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
1133 // CHECK-32-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
1134 // CHECK-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
1135 // CHECK-32-NEXT:    store i8* null, i8** [[TMP15]], align 4
1136 // CHECK-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
1137 // CHECK-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
1138 // CHECK-32-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
1139 // CHECK-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
1140 // CHECK-32-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
1141 // CHECK-32-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
1142 // CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
1143 // CHECK-32-NEXT:    store i8* null, i8** [[TMP20]], align 4
1144 // CHECK-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
1145 // CHECK-32-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
1146 // CHECK-32-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
1147 // CHECK-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
1148 // CHECK-32-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
1149 // CHECK-32-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
1150 // CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
1151 // CHECK-32-NEXT:    store i8* null, i8** [[TMP25]], align 4
1152 // CHECK-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1153 // CHECK-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1154 // CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
1155 // CHECK-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
1156 // CHECK-32-NEXT:    store i32 2, i32* [[TMP28]], align 4
1157 // CHECK-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
1158 // CHECK-32-NEXT:    store i32 3, i32* [[TMP29]], align 4
1159 // CHECK-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
1160 // CHECK-32-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
1161 // CHECK-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
1162 // CHECK-32-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
1163 // CHECK-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
1164 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
1165 // CHECK-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
1166 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
1167 // CHECK-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
1168 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP34]], align 4
1169 // CHECK-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
1170 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP35]], align 4
1171 // CHECK-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
1172 // CHECK-32-NEXT:    store i64 0, i64* [[TMP36]], align 8
1173 // CHECK-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
1174 // CHECK-32-NEXT:    store i64 0, i64* [[TMP37]], align 8
1175 // CHECK-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
1176 // CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
1177 // CHECK-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
1178 // CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
1179 // CHECK-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
1180 // CHECK-32-NEXT:    store i32 0, i32* [[TMP40]], align 4
1181 // CHECK-32-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
1182 // CHECK-32-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
1183 // CHECK-32-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
1184 // CHECK-32:       omp_offload.failed:
1185 // CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
1186 // CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
1187 // CHECK-32:       omp_offload.cont:
1188 // CHECK-32-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
1189 // CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
1190 // CHECK-32-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
1191 // CHECK-32-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
1192 // CHECK-32-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
1193 // CHECK-32-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
1194 // CHECK-32-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
1195 // CHECK-32-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
1196 // CHECK-32-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
1197 // CHECK-32-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
1198 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
1199 // CHECK-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
1200 // CHECK-32-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
1201 // CHECK-32-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
1202 // CHECK-32-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
1203 // CHECK-32-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
1204 // CHECK-32-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
1205 // CHECK-32-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
1206 // CHECK-32-NEXT:    store i8* null, i8** [[TMP55]], align 4
1207 // CHECK-32-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
1208 // CHECK-32-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
1209 // CHECK-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
1210 // CHECK-32-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
1211 // CHECK-32-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
1212 // CHECK-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
1213 // CHECK-32-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
1214 // CHECK-32-NEXT:    store i8* null, i8** [[TMP60]], align 4
1215 // CHECK-32-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
1216 // CHECK-32-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
1217 // CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
1218 // CHECK-32-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
1219 // CHECK-32-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
1220 // CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
1221 // CHECK-32-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
1222 // CHECK-32-NEXT:    store i8* null, i8** [[TMP65]], align 4
1223 // CHECK-32-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
1224 // CHECK-32-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
1225 // CHECK-32-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
1226 // CHECK-32-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
1227 // CHECK-32-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
1228 // CHECK-32-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
1229 // CHECK-32-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
1230 // CHECK-32-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
1231 // CHECK-32-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
1232 // CHECK-32-NEXT:    store i8* null, i8** [[TMP71]], align 4
1233 // CHECK-32-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
1234 // CHECK-32-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
1235 // CHECK-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
1236 // CHECK-32-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
1237 // CHECK-32-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
1238 // CHECK-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
1239 // CHECK-32-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
1240 // CHECK-32-NEXT:    store i8* null, i8** [[TMP76]], align 4
1241 // CHECK-32-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
1242 // CHECK-32-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
1243 // CHECK-32-NEXT:    store i32 5, i32* [[TMP78]], align 4
1244 // CHECK-32-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
1245 // CHECK-32-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
1246 // CHECK-32-NEXT:    store i32 5, i32* [[TMP80]], align 4
1247 // CHECK-32-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
1248 // CHECK-32-NEXT:    store i8* null, i8** [[TMP81]], align 4
1249 // CHECK-32-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
1250 // CHECK-32-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
1251 // CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
1252 // CHECK-32-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
1253 // CHECK-32-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
1254 // CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
1255 // CHECK-32-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
1256 // CHECK-32-NEXT:    store i8* null, i8** [[TMP86]], align 4
1257 // CHECK-32-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
1258 // CHECK-32-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
1259 // CHECK-32-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
1260 // CHECK-32-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
1261 // CHECK-32-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
1262 // CHECK-32-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
1263 // CHECK-32-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
1264 // CHECK-32-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
1265 // CHECK-32-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
1266 // CHECK-32-NEXT:    store i8* null, i8** [[TMP92]], align 4
1267 // CHECK-32-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
1268 // CHECK-32-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
1269 // CHECK-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
1270 // CHECK-32-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
1271 // CHECK-32-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
1272 // CHECK-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
1273 // CHECK-32-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
1274 // CHECK-32-NEXT:    store i8* null, i8** [[TMP97]], align 4
1275 // CHECK-32-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
1276 // CHECK-32-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
1277 // CHECK-32-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
1278 // CHECK-32-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
1279 // CHECK-32-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
1280 // CHECK-32-NEXT:    store i32 2, i32* [[TMP101]], align 4
1281 // CHECK-32-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
1282 // CHECK-32-NEXT:    store i32 9, i32* [[TMP102]], align 4
1283 // CHECK-32-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
1284 // CHECK-32-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
1285 // CHECK-32-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
1286 // CHECK-32-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
1287 // CHECK-32-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
1288 // CHECK-32-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
1289 // CHECK-32-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
1290 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
1291 // CHECK-32-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
1292 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP107]], align 4
1293 // CHECK-32-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
1294 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP108]], align 4
1295 // CHECK-32-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
1296 // CHECK-32-NEXT:    store i64 0, i64* [[TMP109]], align 8
1297 // CHECK-32-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
1298 // CHECK-32-NEXT:    store i64 0, i64* [[TMP110]], align 8
1299 // CHECK-32-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
1300 // CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
1301 // CHECK-32-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
1302 // CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
1303 // CHECK-32-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
1304 // CHECK-32-NEXT:    store i32 0, i32* [[TMP113]], align 4
1305 // CHECK-32-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
1306 // CHECK-32-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
1307 // CHECK-32-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
1308 // CHECK-32:       omp_offload.failed6:
1309 // CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
1310 // CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
1311 // CHECK-32:       omp_offload.cont7:
1312 // CHECK-32-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
1313 // CHECK-32-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
1314 // CHECK-32-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
1315 // CHECK-32-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
1316 // CHECK-32-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
1317 // CHECK-32-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
1318 // CHECK-32-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
1319 // CHECK-32-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
1320 // CHECK-32-NEXT:    store i8* null, i8** [[TMP121]], align 4
1321 // CHECK-32-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
1322 // CHECK-32-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
1323 // CHECK-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
1324 // CHECK-32-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
1325 // CHECK-32-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
1326 // CHECK-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
1327 // CHECK-32-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
1328 // CHECK-32-NEXT:    store i8* null, i8** [[TMP126]], align 4
1329 // CHECK-32-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
1330 // CHECK-32-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
1331 // CHECK-32-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
1332 // CHECK-32-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
1333 // CHECK-32-NEXT:    store i32 2, i32* [[TMP129]], align 4
1334 // CHECK-32-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
1335 // CHECK-32-NEXT:    store i32 2, i32* [[TMP130]], align 4
1336 // CHECK-32-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
1337 // CHECK-32-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
1338 // CHECK-32-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
1339 // CHECK-32-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
1340 // CHECK-32-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
1341 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
1342 // CHECK-32-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
1343 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
1344 // CHECK-32-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
1345 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP135]], align 4
1346 // CHECK-32-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
1347 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP136]], align 4
1348 // CHECK-32-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
1349 // CHECK-32-NEXT:    store i64 0, i64* [[TMP137]], align 8
1350 // CHECK-32-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
1351 // CHECK-32-NEXT:    store i64 0, i64* [[TMP138]], align 8
1352 // CHECK-32-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
1353 // CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
1354 // CHECK-32-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
1355 // CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
1356 // CHECK-32-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
1357 // CHECK-32-NEXT:    store i32 0, i32* [[TMP141]], align 4
1358 // CHECK-32-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
1359 // CHECK-32-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
1360 // CHECK-32-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
1361 // CHECK-32:       omp_offload.failed12:
1362 // CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
1363 // CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
1364 // CHECK-32:       omp_offload.cont13:
1365 // CHECK-32-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
1366 // CHECK-32-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
1367 // CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
1368 // CHECK-32-NEXT:    ret i32 [[TMP144]]
1369 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
1370 // CHECK-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
1371 // CHECK-32-NEXT:  entry:
1372 // CHECK-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
1373 // CHECK-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
1374 // CHECK-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
1375 // CHECK-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
1376 // CHECK-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
1377 // CHECK-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
1378 // CHECK-32-NEXT:    ret void
1379 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
1380 // CHECK-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
1381 // CHECK-32-NEXT:  entry:
1382 // CHECK-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
1383 // CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
1384 // CHECK-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
1385 // CHECK-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
1386 // CHECK-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
1387 // CHECK-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
1388 // CHECK-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
1389 // CHECK-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
1390 // CHECK-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
1391 // CHECK-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
1392 // CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
1393 // CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
1394 // CHECK-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
1395 // CHECK-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
1396 // CHECK-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
1397 // CHECK-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
1398 // CHECK-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
1399 // CHECK-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
1400 // CHECK-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
1401 // CHECK-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
1402 // CHECK-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
1403 // CHECK-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
1404 // CHECK-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
1405 // CHECK-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
1406 // CHECK-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
1407 // CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
1408 // CHECK-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
1409 // CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
1410 // CHECK-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
1411 // CHECK-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
1412 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
1413 // CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
1414 // CHECK-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
1415 // CHECK-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
1416 // CHECK-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
1417 // CHECK-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
1418 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
1419 // CHECK-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
1420 // CHECK-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
1421 // CHECK-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
1422 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
1423 // CHECK-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
1424 // CHECK-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
1425 // CHECK-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
1426 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
1427 // CHECK-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
1428 // CHECK-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
1429 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
1430 // CHECK-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
1431 // CHECK-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
1432 // CHECK-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
1433 // CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
1434 // CHECK-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
1435 // CHECK-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
1436 // CHECK-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
1437 // CHECK-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
1438 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
1439 // CHECK-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
1440 // CHECK-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
1441 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
1442 // CHECK-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
1443 // CHECK-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
1444 // CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
1445 // CHECK-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
1446 // CHECK-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
1447 // CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
1448 // CHECK-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
1449 // CHECK-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
1450 // CHECK-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
1451 // CHECK-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
1452 // CHECK-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
1453 // CHECK-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
1454 // CHECK-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
1455 // CHECK-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
1456 // CHECK-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
1457 // CHECK-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
1458 // CHECK-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
1459 // CHECK-32-NEXT:    store i64 1, i64* [[X]], align 4
1460 // CHECK-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
1461 // CHECK-32-NEXT:    store i8 1, i8* [[Y]], align 4
1462 // CHECK-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
1463 // CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
1464 // CHECK-32-NEXT:    ret void
1465 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
1466 // CHECK-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
1467 // CHECK-32-NEXT:  entry:
1468 // CHECK-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
1469 // CHECK-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
1470 // CHECK-32-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
1471 // CHECK-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
1472 // CHECK-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
1473 // CHECK-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
1474 // CHECK-32-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
1475 // CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
1476 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
1477 // CHECK-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
1478 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
1479 // CHECK-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
1480 // CHECK-32-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
1481 // CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
1482 // CHECK-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
1483 // CHECK-32-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
1484 // CHECK-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
1485 // CHECK-32-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
1486 // CHECK-32-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
1487 // CHECK-32-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
1488 // CHECK-32-NEXT:    ret void
1489 // CHECK-32-LABEL: define {{[^@]+}}@_Z3bariPd
1490 // CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
1491 // CHECK-32-NEXT:  entry:
1492 // CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
1493 // CHECK-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
1494 // CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
1495 // CHECK-32-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
1496 // CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
1497 // CHECK-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
1498 // CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
1499 // CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
1500 // CHECK-32-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
1501 // CHECK-32-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
1502 // CHECK-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
1503 // CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
1504 // CHECK-32-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
1505 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
1506 // CHECK-32-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
1507 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
1508 // CHECK-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
1509 // CHECK-32-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
1510 // CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
1511 // CHECK-32-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
1512 // CHECK-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
1513 // CHECK-32-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
1514 // CHECK-32-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
1515 // CHECK-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
1516 // CHECK-32-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
1517 // CHECK-32-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
1518 // CHECK-32-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
1519 // CHECK-32-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
1520 // CHECK-32-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
1521 // CHECK-32-NEXT:    ret i32 [[TMP9]]
1522 // CHECK-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
1523 // CHECK-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
1524 // CHECK-32-NEXT:  entry:
1525 // CHECK-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
1526 // CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
1527 // CHECK-32-NEXT:    [[B:%.*]] = alloca i32, align 4
1528 // CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
1529 // CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
1530 // CHECK-32-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
1531 // CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
1532 // CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
1533 // CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
1534 // CHECK-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
1535 // CHECK-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
1536 // CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
1537 // CHECK-32-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
1538 // CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
1539 // CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
1540 // CHECK-32-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
1541 // CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
1542 // CHECK-32-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
1543 // CHECK-32-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
1544 // CHECK-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
1545 // CHECK-32-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
1546 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
1547 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
1548 // CHECK-32-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
1549 // CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
1550 // CHECK-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
1551 // CHECK-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
1552 // CHECK-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
1553 // CHECK-32-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
1554 // CHECK-32-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
1555 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
1556 // CHECK-32-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1557 // CHECK-32-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
1558 // CHECK-32-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
1559 // CHECK-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1560 // CHECK-32-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
1561 // CHECK-32-NEXT:    store double* [[A]], double** [[TMP13]], align 4
1562 // CHECK-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
1563 // CHECK-32-NEXT:    store i8* null, i8** [[TMP14]], align 4
1564 // CHECK-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
1565 // CHECK-32-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
1566 // CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
1567 // CHECK-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
1568 // CHECK-32-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
1569 // CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
1570 // CHECK-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
1571 // CHECK-32-NEXT:    store i8* null, i8** [[TMP19]], align 4
1572 // CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
1573 // CHECK-32-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
1574 // CHECK-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
1575 // CHECK-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
1576 // CHECK-32-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
1577 // CHECK-32-NEXT:    store i32 2, i32* [[TMP23]], align 4
1578 // CHECK-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
1579 // CHECK-32-NEXT:    store i8* null, i8** [[TMP24]], align 4
1580 // CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
1581 // CHECK-32-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
1582 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
1583 // CHECK-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
1584 // CHECK-32-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
1585 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
1586 // CHECK-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
1587 // CHECK-32-NEXT:    store i8* null, i8** [[TMP29]], align 4
1588 // CHECK-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
1589 // CHECK-32-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
1590 // CHECK-32-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
1591 // CHECK-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
1592 // CHECK-32-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
1593 // CHECK-32-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
1594 // CHECK-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
1595 // CHECK-32-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
1596 // CHECK-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
1597 // CHECK-32-NEXT:    store i8* null, i8** [[TMP35]], align 4
1598 // CHECK-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1599 // CHECK-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1600 // CHECK-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
1601 // CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
1602 // CHECK-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
1603 // CHECK-32-NEXT:    store i32 2, i32* [[TMP39]], align 4
1604 // CHECK-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
1605 // CHECK-32-NEXT:    store i32 5, i32* [[TMP40]], align 4
1606 // CHECK-32-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
1607 // CHECK-32-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
1608 // CHECK-32-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
1609 // CHECK-32-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
1610 // CHECK-32-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
1611 // CHECK-32-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
1612 // CHECK-32-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
1613 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
1614 // CHECK-32-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
1615 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP45]], align 4
1616 // CHECK-32-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
1617 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP46]], align 4
1618 // CHECK-32-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
1619 // CHECK-32-NEXT:    store i64 0, i64* [[TMP47]], align 8
1620 // CHECK-32-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
1621 // CHECK-32-NEXT:    store i64 0, i64* [[TMP48]], align 8
1622 // CHECK-32-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
1623 // CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
1624 // CHECK-32-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
1625 // CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
1626 // CHECK-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
1627 // CHECK-32-NEXT:    store i32 0, i32* [[TMP51]], align 4
1628 // CHECK-32-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
1629 // CHECK-32-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
1630 // CHECK-32-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
1631 // CHECK-32:       omp_offload.failed:
1632 // CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
1633 // CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
1634 // CHECK-32:       omp_offload.cont:
1635 // CHECK-32-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
1636 // CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
1637 // CHECK-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
1638 // CHECK-32-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
1639 // CHECK-32-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
1640 // CHECK-32-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
1641 // CHECK-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
1642 // CHECK-32-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
1643 // CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
1644 // CHECK-32-NEXT:    ret i32 [[ADD3]]
1645 // CHECK-32-LABEL: define {{[^@]+}}@_ZL7fstatici
1646 // CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
1647 // CHECK-32-NEXT:  entry:
1648 // CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
1649 // CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
1650 // CHECK-32-NEXT:    [[AAA:%.*]] = alloca i8, align 1
1651 // CHECK-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
1652 // CHECK-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
1653 // CHECK-32-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
1654 // CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
1655 // CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
1656 // CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
1657 // CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
1658 // CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
1659 // CHECK-32-NEXT:    store i8 0, i8* [[AAA]], align 1
1660 // CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
1661 // CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
1662 // CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
1663 // CHECK-32-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
1664 // CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
1665 // CHECK-32-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
1666 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
1667 // CHECK-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1668 // CHECK-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
1669 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
1670 // CHECK-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1671 // CHECK-32-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
1672 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
1673 // CHECK-32-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
1674 // CHECK-32-NEXT:    store i8* null, i8** [[TMP8]], align 4
1675 // CHECK-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
1676 // CHECK-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
1677 // CHECK-32-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
1678 // CHECK-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
1679 // CHECK-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
1680 // CHECK-32-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
1681 // CHECK-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
1682 // CHECK-32-NEXT:    store i8* null, i8** [[TMP13]], align 4
1683 // CHECK-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
1684 // CHECK-32-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
1685 // CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
1686 // CHECK-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
1687 // CHECK-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
1688 // CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
1689 // CHECK-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
1690 // CHECK-32-NEXT:    store i8* null, i8** [[TMP18]], align 4
1691 // CHECK-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1692 // CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1693 // CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
1694 // CHECK-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
1695 // CHECK-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
1696 // CHECK-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
1697 // CHECK-32-NEXT:    store i32 3, i32* [[TMP22]], align 4
1698 // CHECK-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
1699 // CHECK-32-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
1700 // CHECK-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
1701 // CHECK-32-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
1702 // CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
1703 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
1704 // CHECK-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
1705 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
1706 // CHECK-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
1707 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP27]], align 4
1708 // CHECK-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
1709 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP28]], align 4
1710 // CHECK-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
1711 // CHECK-32-NEXT:    store i64 0, i64* [[TMP29]], align 8
1712 // CHECK-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
1713 // CHECK-32-NEXT:    store i64 0, i64* [[TMP30]], align 8
1714 // CHECK-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
1715 // CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
1716 // CHECK-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
1717 // CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
1718 // CHECK-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
1719 // CHECK-32-NEXT:    store i32 0, i32* [[TMP33]], align 4
1720 // CHECK-32-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
1721 // CHECK-32-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
1722 // CHECK-32-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
1723 // CHECK-32:       omp_offload.failed:
1724 // CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
1725 // CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
1726 // CHECK-32:       omp_offload.cont:
1727 // CHECK-32-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
1728 // CHECK-32-NEXT:    ret i32 [[TMP36]]
1729 // CHECK-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
1730 // CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
1731 // CHECK-32-NEXT:  entry:
1732 // CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
1733 // CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
1734 // CHECK-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
1735 // CHECK-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
1736 // CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
1737 // CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
1738 // CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
1739 // CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
1740 // CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
1741 // CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
1742 // CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
1743 // CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
1744 // CHECK-32-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1745 // CHECK-32-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
1746 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
1747 // CHECK-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1748 // CHECK-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
1749 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
1750 // CHECK-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
1751 // CHECK-32-NEXT:    store i8* null, i8** [[TMP6]], align 4
1752 // CHECK-32-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
1753 // CHECK-32-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
1754 // CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
1755 // CHECK-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
1756 // CHECK-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
1757 // CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
1758 // CHECK-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
1759 // CHECK-32-NEXT:    store i8* null, i8** [[TMP11]], align 4
1760 // CHECK-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1761 // CHECK-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1762 // CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
1763 // CHECK-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
1764 // CHECK-32-NEXT:    store i32 2, i32* [[TMP14]], align 4
1765 // CHECK-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
1766 // CHECK-32-NEXT:    store i32 2, i32* [[TMP15]], align 4
1767 // CHECK-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
1768 // CHECK-32-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
1769 // CHECK-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
1770 // CHECK-32-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
1771 // CHECK-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
1772 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
1773 // CHECK-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
1774 // CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
1775 // CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
1776 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP20]], align 4
1777 // CHECK-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
1778 // CHECK-32-NEXT:    store i8** null, i8*** [[TMP21]], align 4
1779 // CHECK-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
1780 // CHECK-32-NEXT:    store i64 0, i64* [[TMP22]], align 8
1781 // CHECK-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
1782 // CHECK-32-NEXT:    store i64 0, i64* [[TMP23]], align 8
1783 // CHECK-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
1784 // CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
1785 // CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
1786 // CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
1787 // CHECK-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
1788 // CHECK-32-NEXT:    store i32 0, i32* [[TMP26]], align 4
1789 // CHECK-32-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
1790 // CHECK-32-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
1791 // CHECK-32-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
1792 // CHECK-32:       omp_offload.failed:
1793 // CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
1794 // CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
1795 // CHECK-32:       omp_offload.cont:
1796 // CHECK-32-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
1797 // CHECK-32-NEXT:    ret i32 [[TMP29]]
1798 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
1799 // CHECK-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
1800 // CHECK-32-NEXT:  entry:
1801 // CHECK-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
1802 // CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
1803 // CHECK-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
1804 // CHECK-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
1805 // CHECK-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
1806 // CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
1807 // CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
1808 // CHECK-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
1809 // CHECK-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
1810 // CHECK-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
1811 // CHECK-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
1812 // CHECK-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
1813 // CHECK-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
1814 // CHECK-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
1815 // CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
1816 // CHECK-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
1817 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
1818 // CHECK-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
1819 // CHECK-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
1820 // CHECK-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
1821 // CHECK-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
1822 // CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
1823 // CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
1824 // CHECK-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
1825 // CHECK-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
1826 // CHECK-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
1827 // CHECK-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
1828 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
1829 // CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
1830 // CHECK-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
1831 // CHECK-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
1832 // CHECK-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
1833 // CHECK-32-NEXT:    store double [[ADD]], double* [[A]], align 4
1834 // CHECK-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
1835 // CHECK-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
1836 // CHECK-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
1837 // CHECK-32-NEXT:    store double [[INC]], double* [[A4]], align 4
1838 // CHECK-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
1839 // CHECK-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
1840 // CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
1841 // CHECK-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
1842 // CHECK-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
1843 // CHECK-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
1844 // CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
1845 // CHECK-32-NEXT:    ret void
1846 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
1847 // CHECK-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
1848 // CHECK-32-NEXT:  entry:
1849 // CHECK-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
1850 // CHECK-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
1851 // CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
1852 // CHECK-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
1853 // CHECK-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
1854 // CHECK-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
1855 // CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
1856 // CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
1857 // CHECK-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
1858 // CHECK-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
1859 // CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
1860 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
1861 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
1862 // CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
1863 // CHECK-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
1864 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
1865 // CHECK-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
1866 // CHECK-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
1867 // CHECK-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
1868 // CHECK-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
1869 // CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
1870 // CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1871 // CHECK-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
1872 // CHECK-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
1873 // CHECK-32-NEXT:    ret void
1874 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
1875 // CHECK-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
1876 // CHECK-32-NEXT:  entry:
1877 // CHECK-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
1878 // CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
1879 // CHECK-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
1880 // CHECK-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
1881 // CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
1882 // CHECK-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
1883 // CHECK-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
1884 // CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
1885 // CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
1886 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
1887 // CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
1888 // CHECK-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
1889 // CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
1890 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1891 // CHECK-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
1892 // CHECK-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
1893 // CHECK-32-NEXT:    ret void
1894 // CHECK-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
1895 // CHECK-32-SAME: () #[[ATTR5:[0-9]+]] {
1896 // CHECK-32-NEXT:  entry:
1897 // CHECK-32-NEXT:    call void @__tgt_register_requires(i64 1)
1898 // CHECK-32-NEXT:    ret void
1899 // CHECK0-64-LABEL: define {{[^@]+}}@_Z3fooiPd
1900 // CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
1901 // CHECK0-64-NEXT:  entry:
1902 // CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
1903 // CHECK0-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
1904 // CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
1905 // CHECK0-64-NEXT:    [[AA:%.*]] = alloca i16, align 2
1906 // CHECK0-64-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
1907 // CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
1908 // CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
1909 // CHECK0-64-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
1910 // CHECK0-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
1911 // CHECK0-64-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
1912 // CHECK0-64-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
1913 // CHECK0-64-NEXT:    [[P:%.*]] = alloca i32*, align 64
1914 // CHECK0-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
1915 // CHECK0-64-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
1916 // CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
1917 // CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
1918 // CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
1919 // CHECK0-64-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
1920 // CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
1921 // CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
1922 // CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
1923 // CHECK0-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
1924 // CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
1925 // CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
1926 // CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
1927 // CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
1928 // CHECK0-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
1929 // CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
1930 // CHECK0-64-NEXT:    store i16 0, i16* [[AA]], align 2
1931 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
1932 // CHECK0-64-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1933 // CHECK0-64-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
1934 // CHECK0-64-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
1935 // CHECK0-64-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
1936 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
1937 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
1938 // CHECK0-64-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1939 // CHECK0-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
1940 // CHECK0-64-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
1941 // CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
1942 // CHECK0-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
1943 // CHECK0-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
1944 // CHECK0-64-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
1945 // CHECK0-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
1946 // CHECK0-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
1947 // CHECK0-64-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
1948 // CHECK0-64-NEXT:    store i32* [[A]], i32** [[P]], align 64
1949 // CHECK0-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
1950 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
1951 // CHECK0-64-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
1952 // CHECK0-64-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
1953 // CHECK0-64-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
1954 // CHECK0-64-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
1955 // CHECK0-64-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
1956 // CHECK0-64-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
1957 // CHECK0-64-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
1958 // CHECK0-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1959 // CHECK0-64-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
1960 // CHECK0-64-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
1961 // CHECK0-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1962 // CHECK0-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
1963 // CHECK0-64-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
1964 // CHECK0-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
1965 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP17]], align 8
1966 // CHECK0-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
1967 // CHECK0-64-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
1968 // CHECK0-64-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
1969 // CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
1970 // CHECK0-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
1971 // CHECK0-64-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
1972 // CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
1973 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP22]], align 8
1974 // CHECK0-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
1975 // CHECK0-64-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
1976 // CHECK0-64-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
1977 // CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
1978 // CHECK0-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
1979 // CHECK0-64-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
1980 // CHECK0-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
1981 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP27]], align 8
1982 // CHECK0-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1983 // CHECK0-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1984 // CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
1985 // CHECK0-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
1986 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP30]], align 4
1987 // CHECK0-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
1988 // CHECK0-64-NEXT:    store i32 3, i32* [[TMP31]], align 4
1989 // CHECK0-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
1990 // CHECK0-64-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
1991 // CHECK0-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
1992 // CHECK0-64-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
1993 // CHECK0-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
1994 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
1995 // CHECK0-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
1996 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
1997 // CHECK0-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
1998 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP36]], align 8
1999 // CHECK0-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
2000 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP37]], align 8
2001 // CHECK0-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
2002 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP38]], align 8
2003 // CHECK0-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
2004 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP39]], align 8
2005 // CHECK0-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
2006 // CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
2007 // CHECK0-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
2008 // CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
2009 // CHECK0-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
2010 // CHECK0-64-NEXT:    store i32 0, i32* [[TMP42]], align 4
2011 // CHECK0-64-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
2012 // CHECK0-64-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
2013 // CHECK0-64-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
2014 // CHECK0-64:       omp_offload.failed:
2015 // CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
2016 // CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
2017 // CHECK0-64:       omp_offload.cont:
2018 // CHECK0-64-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
2019 // CHECK0-64-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
2020 // CHECK0-64-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
2021 // CHECK0-64-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
2022 // CHECK0-64-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
2023 // CHECK0-64-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
2024 // CHECK0-64-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
2025 // CHECK0-64-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
2026 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
2027 // CHECK0-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
2028 // CHECK0-64-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
2029 // CHECK0-64-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
2030 // CHECK0-64-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
2031 // CHECK0-64-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
2032 // CHECK0-64-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
2033 // CHECK0-64-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
2034 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP55]], align 8
2035 // CHECK0-64-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
2036 // CHECK0-64-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
2037 // CHECK0-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
2038 // CHECK0-64-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
2039 // CHECK0-64-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
2040 // CHECK0-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
2041 // CHECK0-64-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
2042 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP60]], align 8
2043 // CHECK0-64-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
2044 // CHECK0-64-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
2045 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
2046 // CHECK0-64-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
2047 // CHECK0-64-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
2048 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
2049 // CHECK0-64-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
2050 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP65]], align 8
2051 // CHECK0-64-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
2052 // CHECK0-64-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
2053 // CHECK0-64-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
2054 // CHECK0-64-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
2055 // CHECK0-64-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
2056 // CHECK0-64-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
2057 // CHECK0-64-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
2058 // CHECK0-64-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
2059 // CHECK0-64-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
2060 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP71]], align 8
2061 // CHECK0-64-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
2062 // CHECK0-64-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
2063 // CHECK0-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
2064 // CHECK0-64-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
2065 // CHECK0-64-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
2066 // CHECK0-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
2067 // CHECK0-64-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
2068 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP76]], align 8
2069 // CHECK0-64-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
2070 // CHECK0-64-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
2071 // CHECK0-64-NEXT:    store i64 5, i64* [[TMP78]], align 8
2072 // CHECK0-64-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
2073 // CHECK0-64-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
2074 // CHECK0-64-NEXT:    store i64 5, i64* [[TMP80]], align 8
2075 // CHECK0-64-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
2076 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP81]], align 8
2077 // CHECK0-64-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
2078 // CHECK0-64-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
2079 // CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
2080 // CHECK0-64-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
2081 // CHECK0-64-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
2082 // CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
2083 // CHECK0-64-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
2084 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP86]], align 8
2085 // CHECK0-64-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
2086 // CHECK0-64-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
2087 // CHECK0-64-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
2088 // CHECK0-64-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
2089 // CHECK0-64-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
2090 // CHECK0-64-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
2091 // CHECK0-64-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
2092 // CHECK0-64-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
2093 // CHECK0-64-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
2094 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP92]], align 8
2095 // CHECK0-64-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
2096 // CHECK0-64-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
2097 // CHECK0-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
2098 // CHECK0-64-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
2099 // CHECK0-64-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
2100 // CHECK0-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
2101 // CHECK0-64-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
2102 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP97]], align 8
2103 // CHECK0-64-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
2104 // CHECK0-64-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
2105 // CHECK0-64-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
2106 // CHECK0-64-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
2107 // CHECK0-64-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
2108 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP101]], align 4
2109 // CHECK0-64-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
2110 // CHECK0-64-NEXT:    store i32 9, i32* [[TMP102]], align 4
2111 // CHECK0-64-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
2112 // CHECK0-64-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
2113 // CHECK0-64-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
2114 // CHECK0-64-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
2115 // CHECK0-64-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
2116 // CHECK0-64-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
2117 // CHECK0-64-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
2118 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
2119 // CHECK0-64-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
2120 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP107]], align 8
2121 // CHECK0-64-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
2122 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP108]], align 8
2123 // CHECK0-64-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
2124 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP109]], align 8
2125 // CHECK0-64-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
2126 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP110]], align 8
2127 // CHECK0-64-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
2128 // CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
2129 // CHECK0-64-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
2130 // CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
2131 // CHECK0-64-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
2132 // CHECK0-64-NEXT:    store i32 0, i32* [[TMP113]], align 4
2133 // CHECK0-64-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
2134 // CHECK0-64-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
2135 // CHECK0-64-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
2136 // CHECK0-64:       omp_offload.failed8:
2137 // CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
2138 // CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
2139 // CHECK0-64:       omp_offload.cont9:
2140 // CHECK0-64-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
2141 // CHECK0-64-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
2142 // CHECK0-64-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
2143 // CHECK0-64-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
2144 // CHECK0-64-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
2145 // CHECK0-64-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
2146 // CHECK0-64-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
2147 // CHECK0-64-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
2148 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP121]], align 8
2149 // CHECK0-64-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
2150 // CHECK0-64-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
2151 // CHECK0-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
2152 // CHECK0-64-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
2153 // CHECK0-64-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
2154 // CHECK0-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
2155 // CHECK0-64-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
2156 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP126]], align 8
2157 // CHECK0-64-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
2158 // CHECK0-64-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
2159 // CHECK0-64-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
2160 // CHECK0-64-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
2161 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP129]], align 4
2162 // CHECK0-64-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
2163 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP130]], align 4
2164 // CHECK0-64-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
2165 // CHECK0-64-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
2166 // CHECK0-64-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
2167 // CHECK0-64-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
2168 // CHECK0-64-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
2169 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
2170 // CHECK0-64-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
2171 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
2172 // CHECK0-64-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
2173 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP135]], align 8
2174 // CHECK0-64-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
2175 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP136]], align 8
2176 // CHECK0-64-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
2177 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP137]], align 8
2178 // CHECK0-64-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
2179 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP138]], align 8
2180 // CHECK0-64-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
2181 // CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
2182 // CHECK0-64-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
2183 // CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
2184 // CHECK0-64-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
2185 // CHECK0-64-NEXT:    store i32 0, i32* [[TMP141]], align 4
2186 // CHECK0-64-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
2187 // CHECK0-64-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
2188 // CHECK0-64-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
2189 // CHECK0-64:       omp_offload.failed14:
2190 // CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
2191 // CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
2192 // CHECK0-64:       omp_offload.cont15:
2193 // CHECK0-64-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
2194 // CHECK0-64-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
2195 // CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
2196 // CHECK0-64-NEXT:    ret i32 [[TMP144]]
2197 // CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
2198 // CHECK0-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
2199 // CHECK0-64-NEXT:  entry:
2200 // CHECK0-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
2201 // CHECK0-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
2202 // CHECK0-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
2203 // CHECK0-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
2204 // CHECK0-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
2205 // CHECK0-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
2206 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
2207 // CHECK0-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
2208 // CHECK0-64-NEXT:    ret void
2209 // CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
2210 // CHECK0-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
2211 // CHECK0-64-NEXT:  entry:
2212 // CHECK0-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
2213 // CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
2214 // CHECK0-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
2215 // CHECK0-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
2216 // CHECK0-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
2217 // CHECK0-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
2218 // CHECK0-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
2219 // CHECK0-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
2220 // CHECK0-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
2221 // CHECK0-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
2222 // CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
2223 // CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
2224 // CHECK0-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
2225 // CHECK0-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
2226 // CHECK0-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
2227 // CHECK0-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
2228 // CHECK0-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
2229 // CHECK0-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
2230 // CHECK0-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
2231 // CHECK0-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
2232 // CHECK0-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
2233 // CHECK0-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
2234 // CHECK0-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
2235 // CHECK0-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
2236 // CHECK0-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
2237 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
2238 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
2239 // CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
2240 // CHECK0-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
2241 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
2242 // CHECK0-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
2243 // CHECK0-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
2244 // CHECK0-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
2245 // CHECK0-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
2246 // CHECK0-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
2247 // CHECK0-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
2248 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
2249 // CHECK0-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
2250 // CHECK0-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
2251 // CHECK0-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
2252 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
2253 // CHECK0-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
2254 // CHECK0-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
2255 // CHECK0-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
2256 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
2257 // CHECK0-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
2258 // CHECK0-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
2259 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
2260 // CHECK0-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
2261 // CHECK0-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
2262 // CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
2263 // CHECK0-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
2264 // CHECK0-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
2265 // CHECK0-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
2266 // CHECK0-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
2267 // CHECK0-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
2268 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
2269 // CHECK0-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
2270 // CHECK0-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
2271 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
2272 // CHECK0-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
2273 // CHECK0-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
2274 // CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
2275 // CHECK0-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
2276 // CHECK0-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
2277 // CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
2278 // CHECK0-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
2279 // CHECK0-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
2280 // CHECK0-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
2281 // CHECK0-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
2282 // CHECK0-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
2283 // CHECK0-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
2284 // CHECK0-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
2285 // CHECK0-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
2286 // CHECK0-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
2287 // CHECK0-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
2288 // CHECK0-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
2289 // CHECK0-64-NEXT:    store i64 1, i64* [[X]], align 8
2290 // CHECK0-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
2291 // CHECK0-64-NEXT:    store i8 1, i8* [[Y]], align 8
2292 // CHECK0-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
2293 // CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
2294 // CHECK0-64-NEXT:    ret void
2295 // CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
2296 // CHECK0-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
2297 // CHECK0-64-NEXT:  entry:
2298 // CHECK0-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
2299 // CHECK0-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
2300 // CHECK0-64-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
2301 // CHECK0-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
2302 // CHECK0-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
2303 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
2304 // CHECK0-64-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
2305 // CHECK0-64-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
2306 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
2307 // CHECK0-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
2308 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
2309 // CHECK0-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
2310 // CHECK0-64-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
2311 // CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
2312 // CHECK0-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
2313 // CHECK0-64-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
2314 // CHECK0-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
2315 // CHECK0-64-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
2316 // CHECK0-64-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
2317 // CHECK0-64-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
2318 // CHECK0-64-NEXT:    ret void
2319 // CHECK0-64-LABEL: define {{[^@]+}}@_Z3bariPd
2320 // CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
2321 // CHECK0-64-NEXT:  entry:
2322 // CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
2323 // CHECK0-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
2324 // CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
2325 // CHECK0-64-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
2326 // CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
2327 // CHECK0-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
2328 // CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
2329 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
2330 // CHECK0-64-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
2331 // CHECK0-64-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
2332 // CHECK0-64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
2333 // CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
2334 // CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
2335 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
2336 // CHECK0-64-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
2337 // CHECK0-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
2338 // CHECK0-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
2339 // CHECK0-64-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
2340 // CHECK0-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
2341 // CHECK0-64-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
2342 // CHECK0-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
2343 // CHECK0-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
2344 // CHECK0-64-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
2345 // CHECK0-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
2346 // CHECK0-64-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
2347 // CHECK0-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
2348 // CHECK0-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
2349 // CHECK0-64-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
2350 // CHECK0-64-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
2351 // CHECK0-64-NEXT:    ret i32 [[TMP9]]
2352 // CHECK0-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
2353 // CHECK0-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
2354 // CHECK0-64-NEXT:  entry:
2355 // CHECK0-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
2356 // CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
2357 // CHECK0-64-NEXT:    [[B:%.*]] = alloca i32, align 4
2358 // CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
2359 // CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
2360 // CHECK0-64-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
2361 // CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
2362 // CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
2363 // CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
2364 // CHECK0-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
2365 // CHECK0-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
2366 // CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
2367 // CHECK0-64-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
2368 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
2369 // CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
2370 // CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
2371 // CHECK0-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
2372 // CHECK0-64-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
2373 // CHECK0-64-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
2374 // CHECK0-64-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
2375 // CHECK0-64-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
2376 // CHECK0-64-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
2377 // CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
2378 // CHECK0-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
2379 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
2380 // CHECK0-64-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
2381 // CHECK0-64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
2382 // CHECK0-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
2383 // CHECK0-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
2384 // CHECK0-64-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
2385 // CHECK0-64-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
2386 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
2387 // CHECK0-64-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2388 // CHECK0-64-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
2389 // CHECK0-64-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
2390 // CHECK0-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2391 // CHECK0-64-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
2392 // CHECK0-64-NEXT:    store double* [[A]], double** [[TMP13]], align 8
2393 // CHECK0-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
2394 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP14]], align 8
2395 // CHECK0-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
2396 // CHECK0-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
2397 // CHECK0-64-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
2398 // CHECK0-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
2399 // CHECK0-64-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
2400 // CHECK0-64-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
2401 // CHECK0-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
2402 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP19]], align 8
2403 // CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
2404 // CHECK0-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
2405 // CHECK0-64-NEXT:    store i64 2, i64* [[TMP21]], align 8
2406 // CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
2407 // CHECK0-64-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
2408 // CHECK0-64-NEXT:    store i64 2, i64* [[TMP23]], align 8
2409 // CHECK0-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
2410 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP24]], align 8
2411 // CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
2412 // CHECK0-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
2413 // CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
2414 // CHECK0-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
2415 // CHECK0-64-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
2416 // CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
2417 // CHECK0-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
2418 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP29]], align 8
2419 // CHECK0-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
2420 // CHECK0-64-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
2421 // CHECK0-64-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
2422 // CHECK0-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
2423 // CHECK0-64-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
2424 // CHECK0-64-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
2425 // CHECK0-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
2426 // CHECK0-64-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
2427 // CHECK0-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
2428 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP35]], align 8
2429 // CHECK0-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2430 // CHECK0-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2431 // CHECK0-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
2432 // CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
2433 // CHECK0-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
2434 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP39]], align 4
2435 // CHECK0-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
2436 // CHECK0-64-NEXT:    store i32 5, i32* [[TMP40]], align 4
2437 // CHECK0-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
2438 // CHECK0-64-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
2439 // CHECK0-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
2440 // CHECK0-64-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
2441 // CHECK0-64-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
2442 // CHECK0-64-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
2443 // CHECK0-64-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
2444 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
2445 // CHECK0-64-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
2446 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP45]], align 8
2447 // CHECK0-64-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
2448 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP46]], align 8
2449 // CHECK0-64-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
2450 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP47]], align 8
2451 // CHECK0-64-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
2452 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP48]], align 8
2453 // CHECK0-64-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
2454 // CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
2455 // CHECK0-64-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
2456 // CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
2457 // CHECK0-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
2458 // CHECK0-64-NEXT:    store i32 0, i32* [[TMP51]], align 4
2459 // CHECK0-64-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
2460 // CHECK0-64-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
2461 // CHECK0-64-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
2462 // CHECK0-64:       omp_offload.failed:
2463 // CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
2464 // CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
2465 // CHECK0-64:       omp_offload.cont:
2466 // CHECK0-64-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
2467 // CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
2468 // CHECK0-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
2469 // CHECK0-64-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
2470 // CHECK0-64-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
2471 // CHECK0-64-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
2472 // CHECK0-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
2473 // CHECK0-64-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
2474 // CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
2475 // CHECK0-64-NEXT:    ret i32 [[ADD4]]
2476 // CHECK0-64-LABEL: define {{[^@]+}}@_ZL7fstatici
2477 // CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
2478 // CHECK0-64-NEXT:  entry:
2479 // CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
2480 // CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
2481 // CHECK0-64-NEXT:    [[AAA:%.*]] = alloca i8, align 1
2482 // CHECK0-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
2483 // CHECK0-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
2484 // CHECK0-64-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
2485 // CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
2486 // CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
2487 // CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
2488 // CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
2489 // CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
2490 // CHECK0-64-NEXT:    store i8 0, i8* [[AAA]], align 1
2491 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
2492 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
2493 // CHECK0-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
2494 // CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
2495 // CHECK0-64-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
2496 // CHECK0-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
2497 // CHECK0-64-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
2498 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
2499 // CHECK0-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2500 // CHECK0-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
2501 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
2502 // CHECK0-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2503 // CHECK0-64-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
2504 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
2505 // CHECK0-64-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
2506 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP8]], align 8
2507 // CHECK0-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
2508 // CHECK0-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
2509 // CHECK0-64-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
2510 // CHECK0-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
2511 // CHECK0-64-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
2512 // CHECK0-64-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
2513 // CHECK0-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
2514 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP13]], align 8
2515 // CHECK0-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
2516 // CHECK0-64-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
2517 // CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
2518 // CHECK0-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
2519 // CHECK0-64-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
2520 // CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
2521 // CHECK0-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
2522 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP18]], align 8
2523 // CHECK0-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2524 // CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2525 // CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
2526 // CHECK0-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
2527 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP21]], align 4
2528 // CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
2529 // CHECK0-64-NEXT:    store i32 3, i32* [[TMP22]], align 4
2530 // CHECK0-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
2531 // CHECK0-64-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
2532 // CHECK0-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
2533 // CHECK0-64-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
2534 // CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
2535 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
2536 // CHECK0-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
2537 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
2538 // CHECK0-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
2539 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP27]], align 8
2540 // CHECK0-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
2541 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP28]], align 8
2542 // CHECK0-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
2543 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP29]], align 8
2544 // CHECK0-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
2545 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP30]], align 8
2546 // CHECK0-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
2547 // CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
2548 // CHECK0-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
2549 // CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
2550 // CHECK0-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
2551 // CHECK0-64-NEXT:    store i32 0, i32* [[TMP33]], align 4
2552 // CHECK0-64-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
2553 // CHECK0-64-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
2554 // CHECK0-64-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
2555 // CHECK0-64:       omp_offload.failed:
2556 // CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
2557 // CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
2558 // CHECK0-64:       omp_offload.cont:
2559 // CHECK0-64-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
2560 // CHECK0-64-NEXT:    ret i32 [[TMP36]]
2561 // CHECK0-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
2562 // CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
2563 // CHECK0-64-NEXT:  entry:
2564 // CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
2565 // CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
2566 // CHECK0-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
2567 // CHECK0-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
2568 // CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
2569 // CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
2570 // CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
2571 // CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
2572 // CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
2573 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
2574 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
2575 // CHECK0-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
2576 // CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
2577 // CHECK0-64-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2578 // CHECK0-64-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
2579 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
2580 // CHECK0-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2581 // CHECK0-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
2582 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
2583 // CHECK0-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
2584 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP6]], align 8
2585 // CHECK0-64-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
2586 // CHECK0-64-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
2587 // CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
2588 // CHECK0-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
2589 // CHECK0-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
2590 // CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
2591 // CHECK0-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
2592 // CHECK0-64-NEXT:    store i8* null, i8** [[TMP11]], align 8
2593 // CHECK0-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2594 // CHECK0-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2595 // CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
2596 // CHECK0-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
2597 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP14]], align 4
2598 // CHECK0-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
2599 // CHECK0-64-NEXT:    store i32 2, i32* [[TMP15]], align 4
2600 // CHECK0-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
2601 // CHECK0-64-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
2602 // CHECK0-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
2603 // CHECK0-64-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
2604 // CHECK0-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
2605 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
2606 // CHECK0-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
2607 // CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
2608 // CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
2609 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP20]], align 8
2610 // CHECK0-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
2611 // CHECK0-64-NEXT:    store i8** null, i8*** [[TMP21]], align 8
2612 // CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
2613 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP22]], align 8
2614 // CHECK0-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
2615 // CHECK0-64-NEXT:    store i64 0, i64* [[TMP23]], align 8
2616 // CHECK0-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
2617 // CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
2618 // CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
2619 // CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
2620 // CHECK0-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
2621 // CHECK0-64-NEXT:    store i32 0, i32* [[TMP26]], align 4
2622 // CHECK0-64-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
2623 // CHECK0-64-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
2624 // CHECK0-64-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
2625 // CHECK0-64:       omp_offload.failed:
2626 // CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
2627 // CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
2628 // CHECK0-64:       omp_offload.cont:
2629 // CHECK0-64-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
2630 // CHECK0-64-NEXT:    ret i32 [[TMP29]]
2631 // CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
2632 // CHECK0-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
2633 // CHECK0-64-NEXT:  entry:
2634 // CHECK0-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
2635 // CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
2636 // CHECK0-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
2637 // CHECK0-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
2638 // CHECK0-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
2639 // CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
2640 // CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
2641 // CHECK0-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
2642 // CHECK0-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
2643 // CHECK0-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
2644 // CHECK0-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
2645 // CHECK0-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
2646 // CHECK0-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
2647 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
2648 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
2649 // CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
2650 // CHECK0-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
2651 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
2652 // CHECK0-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
2653 // CHECK0-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
2654 // CHECK0-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
2655 // CHECK0-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
2656 // CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
2657 // CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
2658 // CHECK0-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
2659 // CHECK0-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
2660 // CHECK0-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
2661 // CHECK0-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
2662 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
2663 // CHECK0-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
2664 // CHECK0-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
2665 // CHECK0-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
2666 // CHECK0-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
2667 // CHECK0-64-NEXT:    store double [[ADD]], double* [[A]], align 8
2668 // CHECK0-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
2669 // CHECK0-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
2670 // CHECK0-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
2671 // CHECK0-64-NEXT:    store double [[INC]], double* [[A5]], align 8
2672 // CHECK0-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
2673 // CHECK0-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
2674 // CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
2675 // CHECK0-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
2676 // CHECK0-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
2677 // CHECK0-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
2678 // CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
2679 // CHECK0-64-NEXT:    ret void
2680 // CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
2681 // CHECK0-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
2682 // CHECK0-64-NEXT:  entry:
2683 // CHECK0-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
2684 // CHECK0-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
2685 // CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
2686 // CHECK0-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
2687 // CHECK0-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
2688 // CHECK0-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
2689 // CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
2690 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
2691 // CHECK0-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
2692 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
2693 // CHECK0-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
2694 // CHECK0-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
2695 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
2696 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
2697 // CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
2698 // CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
2699 // CHECK0-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
2700 // CHECK0-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
2701 // CHECK0-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
2702 // CHECK0-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
2703 // CHECK0-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
2704 // CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
2705 // CHECK0-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
2706 // CHECK0-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
2707 // CHECK0-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
2708 // CHECK0-64-NEXT:    ret void
2709 // CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
2710 // CHECK0-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
2711 // CHECK0-64-NEXT:  entry:
2712 // CHECK0-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
2713 // CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
2714 // CHECK0-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
2715 // CHECK0-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
2716 // CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
2717 // CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
2718 // CHECK0-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
2719 // CHECK0-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
2720 // CHECK0-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
2721 // CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
2722 // CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
2723 // CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
2724 // CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
2725 // CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
2726 // CHECK0-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
2727 // CHECK0-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
2728 // CHECK0-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
2729 // CHECK0-64-NEXT:    ret void
2730 // CHECK0-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
2731 // CHECK0-64-SAME: () #[[ATTR5:[0-9]+]] {
2732 // CHECK0-64-NEXT:  entry:
2733 // CHECK0-64-NEXT:    call void @__tgt_register_requires(i64 1)
2734 // CHECK0-64-NEXT:    ret void
2735 // CHECK1-64-LABEL: define {{[^@]+}}@_Z3fooiPd
2736 // CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
2737 // CHECK1-64-NEXT:  entry:
2738 // CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
2739 // CHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
2740 // CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
2741 // CHECK1-64-NEXT:    [[AA:%.*]] = alloca i16, align 2
2742 // CHECK1-64-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
2743 // CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
2744 // CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
2745 // CHECK1-64-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
2746 // CHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
2747 // CHECK1-64-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
2748 // CHECK1-64-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
2749 // CHECK1-64-NEXT:    [[P:%.*]] = alloca i32*, align 64
2750 // CHECK1-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
2751 // CHECK1-64-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
2752 // CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
2753 // CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
2754 // CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
2755 // CHECK1-64-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
2756 // CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
2757 // CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
2758 // CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
2759 // CHECK1-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
2760 // CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
2761 // CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
2762 // CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
2763 // CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
2764 // CHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
2765 // CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
2766 // CHECK1-64-NEXT:    store i16 0, i16* [[AA]], align 2
2767 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
2768 // CHECK1-64-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2769 // CHECK1-64-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
2770 // CHECK1-64-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
2771 // CHECK1-64-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
2772 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
2773 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
2774 // CHECK1-64-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
2775 // CHECK1-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
2776 // CHECK1-64-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
2777 // CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
2778 // CHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
2779 // CHECK1-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
2780 // CHECK1-64-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
2781 // CHECK1-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
2782 // CHECK1-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
2783 // CHECK1-64-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
2784 // CHECK1-64-NEXT:    store i32* [[A]], i32** [[P]], align 64
2785 // CHECK1-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
2786 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
2787 // CHECK1-64-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
2788 // CHECK1-64-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
2789 // CHECK1-64-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
2790 // CHECK1-64-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
2791 // CHECK1-64-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
2792 // CHECK1-64-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
2793 // CHECK1-64-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
2794 // CHECK1-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2795 // CHECK1-64-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
2796 // CHECK1-64-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
2797 // CHECK1-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2798 // CHECK1-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
2799 // CHECK1-64-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
2800 // CHECK1-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
2801 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP17]], align 8
2802 // CHECK1-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
2803 // CHECK1-64-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
2804 // CHECK1-64-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
2805 // CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
2806 // CHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
2807 // CHECK1-64-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
2808 // CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
2809 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP22]], align 8
2810 // CHECK1-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
2811 // CHECK1-64-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
2812 // CHECK1-64-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
2813 // CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
2814 // CHECK1-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
2815 // CHECK1-64-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
2816 // CHECK1-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
2817 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP27]], align 8
2818 // CHECK1-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2819 // CHECK1-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2820 // CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
2821 // CHECK1-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
2822 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP30]], align 4
2823 // CHECK1-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
2824 // CHECK1-64-NEXT:    store i32 3, i32* [[TMP31]], align 4
2825 // CHECK1-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
2826 // CHECK1-64-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
2827 // CHECK1-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
2828 // CHECK1-64-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
2829 // CHECK1-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
2830 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
2831 // CHECK1-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
2832 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
2833 // CHECK1-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
2834 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP36]], align 8
2835 // CHECK1-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
2836 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP37]], align 8
2837 // CHECK1-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
2838 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP38]], align 8
2839 // CHECK1-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
2840 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP39]], align 8
2841 // CHECK1-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
2842 // CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
2843 // CHECK1-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
2844 // CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
2845 // CHECK1-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
2846 // CHECK1-64-NEXT:    store i32 0, i32* [[TMP42]], align 4
2847 // CHECK1-64-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
2848 // CHECK1-64-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
2849 // CHECK1-64-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
2850 // CHECK1-64:       omp_offload.failed:
2851 // CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
2852 // CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
2853 // CHECK1-64:       omp_offload.cont:
2854 // CHECK1-64-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
2855 // CHECK1-64-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
2856 // CHECK1-64-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
2857 // CHECK1-64-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
2858 // CHECK1-64-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
2859 // CHECK1-64-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
2860 // CHECK1-64-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
2861 // CHECK1-64-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
2862 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
2863 // CHECK1-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
2864 // CHECK1-64-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
2865 // CHECK1-64-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
2866 // CHECK1-64-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
2867 // CHECK1-64-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
2868 // CHECK1-64-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
2869 // CHECK1-64-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
2870 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP55]], align 8
2871 // CHECK1-64-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
2872 // CHECK1-64-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
2873 // CHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
2874 // CHECK1-64-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
2875 // CHECK1-64-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
2876 // CHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
2877 // CHECK1-64-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
2878 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP60]], align 8
2879 // CHECK1-64-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
2880 // CHECK1-64-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
2881 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
2882 // CHECK1-64-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
2883 // CHECK1-64-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
2884 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
2885 // CHECK1-64-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
2886 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP65]], align 8
2887 // CHECK1-64-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
2888 // CHECK1-64-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
2889 // CHECK1-64-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
2890 // CHECK1-64-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
2891 // CHECK1-64-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
2892 // CHECK1-64-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
2893 // CHECK1-64-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
2894 // CHECK1-64-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
2895 // CHECK1-64-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
2896 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP71]], align 8
2897 // CHECK1-64-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
2898 // CHECK1-64-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
2899 // CHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
2900 // CHECK1-64-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
2901 // CHECK1-64-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
2902 // CHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
2903 // CHECK1-64-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
2904 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP76]], align 8
2905 // CHECK1-64-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
2906 // CHECK1-64-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
2907 // CHECK1-64-NEXT:    store i64 5, i64* [[TMP78]], align 8
2908 // CHECK1-64-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
2909 // CHECK1-64-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
2910 // CHECK1-64-NEXT:    store i64 5, i64* [[TMP80]], align 8
2911 // CHECK1-64-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
2912 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP81]], align 8
2913 // CHECK1-64-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
2914 // CHECK1-64-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
2915 // CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
2916 // CHECK1-64-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
2917 // CHECK1-64-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
2918 // CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
2919 // CHECK1-64-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
2920 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP86]], align 8
2921 // CHECK1-64-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
2922 // CHECK1-64-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
2923 // CHECK1-64-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
2924 // CHECK1-64-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
2925 // CHECK1-64-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
2926 // CHECK1-64-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
2927 // CHECK1-64-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
2928 // CHECK1-64-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
2929 // CHECK1-64-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
2930 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP92]], align 8
2931 // CHECK1-64-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
2932 // CHECK1-64-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
2933 // CHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
2934 // CHECK1-64-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
2935 // CHECK1-64-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
2936 // CHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
2937 // CHECK1-64-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
2938 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP97]], align 8
2939 // CHECK1-64-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
2940 // CHECK1-64-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
2941 // CHECK1-64-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
2942 // CHECK1-64-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
2943 // CHECK1-64-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
2944 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP101]], align 4
2945 // CHECK1-64-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
2946 // CHECK1-64-NEXT:    store i32 9, i32* [[TMP102]], align 4
2947 // CHECK1-64-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
2948 // CHECK1-64-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
2949 // CHECK1-64-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
2950 // CHECK1-64-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
2951 // CHECK1-64-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
2952 // CHECK1-64-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
2953 // CHECK1-64-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
2954 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
2955 // CHECK1-64-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
2956 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP107]], align 8
2957 // CHECK1-64-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
2958 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP108]], align 8
2959 // CHECK1-64-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
2960 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP109]], align 8
2961 // CHECK1-64-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
2962 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP110]], align 8
2963 // CHECK1-64-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
2964 // CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
2965 // CHECK1-64-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
2966 // CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
2967 // CHECK1-64-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
2968 // CHECK1-64-NEXT:    store i32 0, i32* [[TMP113]], align 4
2969 // CHECK1-64-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
2970 // CHECK1-64-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
2971 // CHECK1-64-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
2972 // CHECK1-64:       omp_offload.failed8:
2973 // CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
2974 // CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
2975 // CHECK1-64:       omp_offload.cont9:
2976 // CHECK1-64-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
2977 // CHECK1-64-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
2978 // CHECK1-64-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
2979 // CHECK1-64-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
2980 // CHECK1-64-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
2981 // CHECK1-64-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
2982 // CHECK1-64-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
2983 // CHECK1-64-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
2984 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP121]], align 8
2985 // CHECK1-64-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
2986 // CHECK1-64-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
2987 // CHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
2988 // CHECK1-64-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
2989 // CHECK1-64-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
2990 // CHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
2991 // CHECK1-64-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
2992 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP126]], align 8
2993 // CHECK1-64-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
2994 // CHECK1-64-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
2995 // CHECK1-64-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
2996 // CHECK1-64-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
2997 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP129]], align 4
2998 // CHECK1-64-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
2999 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP130]], align 4
3000 // CHECK1-64-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
3001 // CHECK1-64-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
3002 // CHECK1-64-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
3003 // CHECK1-64-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
3004 // CHECK1-64-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
3005 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
3006 // CHECK1-64-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
3007 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
3008 // CHECK1-64-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
3009 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP135]], align 8
3010 // CHECK1-64-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
3011 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP136]], align 8
3012 // CHECK1-64-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
3013 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP137]], align 8
3014 // CHECK1-64-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
3015 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP138]], align 8
3016 // CHECK1-64-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
3017 // CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
3018 // CHECK1-64-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
3019 // CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
3020 // CHECK1-64-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
3021 // CHECK1-64-NEXT:    store i32 0, i32* [[TMP141]], align 4
3022 // CHECK1-64-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
3023 // CHECK1-64-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
3024 // CHECK1-64-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
3025 // CHECK1-64:       omp_offload.failed14:
3026 // CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
3027 // CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
3028 // CHECK1-64:       omp_offload.cont15:
3029 // CHECK1-64-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
3030 // CHECK1-64-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
3031 // CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
3032 // CHECK1-64-NEXT:    ret i32 [[TMP144]]
3033 // CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
3034 // CHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
3035 // CHECK1-64-NEXT:  entry:
3036 // CHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
3037 // CHECK1-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
3038 // CHECK1-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
3039 // CHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
3040 // CHECK1-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
3041 // CHECK1-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
3042 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
3043 // CHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
3044 // CHECK1-64-NEXT:    ret void
3045 // CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
3046 // CHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
3047 // CHECK1-64-NEXT:  entry:
3048 // CHECK1-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
3049 // CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
3050 // CHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
3051 // CHECK1-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
3052 // CHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
3053 // CHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
3054 // CHECK1-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
3055 // CHECK1-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
3056 // CHECK1-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
3057 // CHECK1-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
3058 // CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
3059 // CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
3060 // CHECK1-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
3061 // CHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
3062 // CHECK1-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
3063 // CHECK1-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
3064 // CHECK1-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
3065 // CHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
3066 // CHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
3067 // CHECK1-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
3068 // CHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
3069 // CHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
3070 // CHECK1-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
3071 // CHECK1-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
3072 // CHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
3073 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
3074 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
3075 // CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
3076 // CHECK1-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
3077 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
3078 // CHECK1-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
3079 // CHECK1-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
3080 // CHECK1-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
3081 // CHECK1-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
3082 // CHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
3083 // CHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
3084 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
3085 // CHECK1-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
3086 // CHECK1-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
3087 // CHECK1-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
3088 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
3089 // CHECK1-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
3090 // CHECK1-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
3091 // CHECK1-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
3092 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
3093 // CHECK1-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
3094 // CHECK1-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
3095 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
3096 // CHECK1-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
3097 // CHECK1-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
3098 // CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
3099 // CHECK1-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
3100 // CHECK1-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
3101 // CHECK1-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
3102 // CHECK1-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
3103 // CHECK1-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
3104 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
3105 // CHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
3106 // CHECK1-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
3107 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
3108 // CHECK1-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
3109 // CHECK1-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
3110 // CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
3111 // CHECK1-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
3112 // CHECK1-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
3113 // CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
3114 // CHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
3115 // CHECK1-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
3116 // CHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
3117 // CHECK1-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
3118 // CHECK1-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
3119 // CHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
3120 // CHECK1-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
3121 // CHECK1-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
3122 // CHECK1-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
3123 // CHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
3124 // CHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
3125 // CHECK1-64-NEXT:    store i64 1, i64* [[X]], align 8
3126 // CHECK1-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
3127 // CHECK1-64-NEXT:    store i8 1, i8* [[Y]], align 8
3128 // CHECK1-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
3129 // CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
3130 // CHECK1-64-NEXT:    ret void
3131 // CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
3132 // CHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
3133 // CHECK1-64-NEXT:  entry:
3134 // CHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
3135 // CHECK1-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
3136 // CHECK1-64-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
3137 // CHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
3138 // CHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
3139 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
3140 // CHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
3141 // CHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
3142 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
3143 // CHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
3144 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
3145 // CHECK1-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
3146 // CHECK1-64-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
3147 // CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
3148 // CHECK1-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
3149 // CHECK1-64-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
3150 // CHECK1-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
3151 // CHECK1-64-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
3152 // CHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
3153 // CHECK1-64-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
3154 // CHECK1-64-NEXT:    ret void
3155 // CHECK1-64-LABEL: define {{[^@]+}}@_Z3bariPd
3156 // CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
3157 // CHECK1-64-NEXT:  entry:
3158 // CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
3159 // CHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
3160 // CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
3161 // CHECK1-64-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
3162 // CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
3163 // CHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
3164 // CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
3165 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
3166 // CHECK1-64-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
3167 // CHECK1-64-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
3168 // CHECK1-64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
3169 // CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
3170 // CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
3171 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
3172 // CHECK1-64-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
3173 // CHECK1-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
3174 // CHECK1-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
3175 // CHECK1-64-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
3176 // CHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
3177 // CHECK1-64-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
3178 // CHECK1-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
3179 // CHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
3180 // CHECK1-64-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
3181 // CHECK1-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
3182 // CHECK1-64-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
3183 // CHECK1-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
3184 // CHECK1-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
3185 // CHECK1-64-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
3186 // CHECK1-64-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
3187 // CHECK1-64-NEXT:    ret i32 [[TMP9]]
3188 // CHECK1-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
3189 // CHECK1-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
3190 // CHECK1-64-NEXT:  entry:
3191 // CHECK1-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
3192 // CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
3193 // CHECK1-64-NEXT:    [[B:%.*]] = alloca i32, align 4
3194 // CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
3195 // CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
3196 // CHECK1-64-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
3197 // CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
3198 // CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
3199 // CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
3200 // CHECK1-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
3201 // CHECK1-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
3202 // CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
3203 // CHECK1-64-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
3204 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
3205 // CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
3206 // CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
3207 // CHECK1-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
3208 // CHECK1-64-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
3209 // CHECK1-64-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
3210 // CHECK1-64-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
3211 // CHECK1-64-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
3212 // CHECK1-64-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
3213 // CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
3214 // CHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
3215 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
3216 // CHECK1-64-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
3217 // CHECK1-64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
3218 // CHECK1-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
3219 // CHECK1-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
3220 // CHECK1-64-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
3221 // CHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
3222 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
3223 // CHECK1-64-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3224 // CHECK1-64-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
3225 // CHECK1-64-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
3226 // CHECK1-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3227 // CHECK1-64-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
3228 // CHECK1-64-NEXT:    store double* [[A]], double** [[TMP13]], align 8
3229 // CHECK1-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
3230 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP14]], align 8
3231 // CHECK1-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
3232 // CHECK1-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
3233 // CHECK1-64-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
3234 // CHECK1-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
3235 // CHECK1-64-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
3236 // CHECK1-64-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
3237 // CHECK1-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
3238 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP19]], align 8
3239 // CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
3240 // CHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
3241 // CHECK1-64-NEXT:    store i64 2, i64* [[TMP21]], align 8
3242 // CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
3243 // CHECK1-64-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
3244 // CHECK1-64-NEXT:    store i64 2, i64* [[TMP23]], align 8
3245 // CHECK1-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
3246 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP24]], align 8
3247 // CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
3248 // CHECK1-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
3249 // CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
3250 // CHECK1-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
3251 // CHECK1-64-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
3252 // CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
3253 // CHECK1-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
3254 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP29]], align 8
3255 // CHECK1-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
3256 // CHECK1-64-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
3257 // CHECK1-64-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
3258 // CHECK1-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
3259 // CHECK1-64-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
3260 // CHECK1-64-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
3261 // CHECK1-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
3262 // CHECK1-64-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
3263 // CHECK1-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
3264 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP35]], align 8
3265 // CHECK1-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3266 // CHECK1-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3267 // CHECK1-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
3268 // CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
3269 // CHECK1-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
3270 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP39]], align 4
3271 // CHECK1-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
3272 // CHECK1-64-NEXT:    store i32 5, i32* [[TMP40]], align 4
3273 // CHECK1-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
3274 // CHECK1-64-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
3275 // CHECK1-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
3276 // CHECK1-64-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
3277 // CHECK1-64-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
3278 // CHECK1-64-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
3279 // CHECK1-64-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
3280 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
3281 // CHECK1-64-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
3282 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP45]], align 8
3283 // CHECK1-64-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
3284 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP46]], align 8
3285 // CHECK1-64-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
3286 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP47]], align 8
3287 // CHECK1-64-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
3288 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP48]], align 8
3289 // CHECK1-64-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
3290 // CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
3291 // CHECK1-64-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
3292 // CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
3293 // CHECK1-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
3294 // CHECK1-64-NEXT:    store i32 0, i32* [[TMP51]], align 4
3295 // CHECK1-64-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
3296 // CHECK1-64-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
3297 // CHECK1-64-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
3298 // CHECK1-64:       omp_offload.failed:
3299 // CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
3300 // CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
3301 // CHECK1-64:       omp_offload.cont:
3302 // CHECK1-64-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
3303 // CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
3304 // CHECK1-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
3305 // CHECK1-64-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
3306 // CHECK1-64-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
3307 // CHECK1-64-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
3308 // CHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
3309 // CHECK1-64-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
3310 // CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
3311 // CHECK1-64-NEXT:    ret i32 [[ADD4]]
3312 // CHECK1-64-LABEL: define {{[^@]+}}@_ZL7fstatici
3313 // CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
3314 // CHECK1-64-NEXT:  entry:
3315 // CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
3316 // CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
3317 // CHECK1-64-NEXT:    [[AAA:%.*]] = alloca i8, align 1
3318 // CHECK1-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
3319 // CHECK1-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
3320 // CHECK1-64-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
3321 // CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
3322 // CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
3323 // CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
3324 // CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
3325 // CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
3326 // CHECK1-64-NEXT:    store i8 0, i8* [[AAA]], align 1
3327 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
3328 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
3329 // CHECK1-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
3330 // CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
3331 // CHECK1-64-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
3332 // CHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
3333 // CHECK1-64-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
3334 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
3335 // CHECK1-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3336 // CHECK1-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
3337 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
3338 // CHECK1-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3339 // CHECK1-64-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
3340 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
3341 // CHECK1-64-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
3342 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP8]], align 8
3343 // CHECK1-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
3344 // CHECK1-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
3345 // CHECK1-64-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
3346 // CHECK1-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
3347 // CHECK1-64-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
3348 // CHECK1-64-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
3349 // CHECK1-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
3350 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP13]], align 8
3351 // CHECK1-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
3352 // CHECK1-64-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
3353 // CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
3354 // CHECK1-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
3355 // CHECK1-64-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
3356 // CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
3357 // CHECK1-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
3358 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP18]], align 8
3359 // CHECK1-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3360 // CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3361 // CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
3362 // CHECK1-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
3363 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP21]], align 4
3364 // CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
3365 // CHECK1-64-NEXT:    store i32 3, i32* [[TMP22]], align 4
3366 // CHECK1-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
3367 // CHECK1-64-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
3368 // CHECK1-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
3369 // CHECK1-64-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
3370 // CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
3371 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
3372 // CHECK1-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
3373 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
3374 // CHECK1-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
3375 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP27]], align 8
3376 // CHECK1-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
3377 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP28]], align 8
3378 // CHECK1-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
3379 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP29]], align 8
3380 // CHECK1-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
3381 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP30]], align 8
3382 // CHECK1-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
3383 // CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
3384 // CHECK1-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
3385 // CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
3386 // CHECK1-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
3387 // CHECK1-64-NEXT:    store i32 0, i32* [[TMP33]], align 4
3388 // CHECK1-64-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
3389 // CHECK1-64-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
3390 // CHECK1-64-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
3391 // CHECK1-64:       omp_offload.failed:
3392 // CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
3393 // CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
3394 // CHECK1-64:       omp_offload.cont:
3395 // CHECK1-64-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
3396 // CHECK1-64-NEXT:    ret i32 [[TMP36]]
3397 // CHECK1-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
3398 // CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
3399 // CHECK1-64-NEXT:  entry:
3400 // CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
3401 // CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
3402 // CHECK1-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
3403 // CHECK1-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
3404 // CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
3405 // CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
3406 // CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
3407 // CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
3408 // CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
3409 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
3410 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
3411 // CHECK1-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
3412 // CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
3413 // CHECK1-64-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3414 // CHECK1-64-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
3415 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
3416 // CHECK1-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3417 // CHECK1-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
3418 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
3419 // CHECK1-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
3420 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP6]], align 8
3421 // CHECK1-64-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
3422 // CHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
3423 // CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
3424 // CHECK1-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
3425 // CHECK1-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
3426 // CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
3427 // CHECK1-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
3428 // CHECK1-64-NEXT:    store i8* null, i8** [[TMP11]], align 8
3429 // CHECK1-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3430 // CHECK1-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3431 // CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
3432 // CHECK1-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
3433 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP14]], align 4
3434 // CHECK1-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
3435 // CHECK1-64-NEXT:    store i32 2, i32* [[TMP15]], align 4
3436 // CHECK1-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
3437 // CHECK1-64-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
3438 // CHECK1-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
3439 // CHECK1-64-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
3440 // CHECK1-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
3441 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
3442 // CHECK1-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
3443 // CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
3444 // CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
3445 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP20]], align 8
3446 // CHECK1-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
3447 // CHECK1-64-NEXT:    store i8** null, i8*** [[TMP21]], align 8
3448 // CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
3449 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP22]], align 8
3450 // CHECK1-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
3451 // CHECK1-64-NEXT:    store i64 0, i64* [[TMP23]], align 8
3452 // CHECK1-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
3453 // CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
3454 // CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
3455 // CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
3456 // CHECK1-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
3457 // CHECK1-64-NEXT:    store i32 0, i32* [[TMP26]], align 4
3458 // CHECK1-64-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
3459 // CHECK1-64-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
3460 // CHECK1-64-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
3461 // CHECK1-64:       omp_offload.failed:
3462 // CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
3463 // CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
3464 // CHECK1-64:       omp_offload.cont:
3465 // CHECK1-64-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
3466 // CHECK1-64-NEXT:    ret i32 [[TMP29]]
3467 // CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
3468 // CHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
3469 // CHECK1-64-NEXT:  entry:
3470 // CHECK1-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
3471 // CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
3472 // CHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
3473 // CHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
3474 // CHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
3475 // CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
3476 // CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
3477 // CHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
3478 // CHECK1-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
3479 // CHECK1-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
3480 // CHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
3481 // CHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
3482 // CHECK1-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
3483 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
3484 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
3485 // CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
3486 // CHECK1-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
3487 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
3488 // CHECK1-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
3489 // CHECK1-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
3490 // CHECK1-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
3491 // CHECK1-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
3492 // CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
3493 // CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
3494 // CHECK1-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
3495 // CHECK1-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
3496 // CHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
3497 // CHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
3498 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
3499 // CHECK1-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
3500 // CHECK1-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
3501 // CHECK1-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
3502 // CHECK1-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
3503 // CHECK1-64-NEXT:    store double [[ADD]], double* [[A]], align 8
3504 // CHECK1-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
3505 // CHECK1-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
3506 // CHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
3507 // CHECK1-64-NEXT:    store double [[INC]], double* [[A5]], align 8
3508 // CHECK1-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
3509 // CHECK1-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
3510 // CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
3511 // CHECK1-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
3512 // CHECK1-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
3513 // CHECK1-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
3514 // CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
3515 // CHECK1-64-NEXT:    ret void
3516 // CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
3517 // CHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
3518 // CHECK1-64-NEXT:  entry:
3519 // CHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
3520 // CHECK1-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
3521 // CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
3522 // CHECK1-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
3523 // CHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
3524 // CHECK1-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
3525 // CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
3526 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
3527 // CHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
3528 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
3529 // CHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
3530 // CHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
3531 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
3532 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
3533 // CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
3534 // CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
3535 // CHECK1-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
3536 // CHECK1-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
3537 // CHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
3538 // CHECK1-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
3539 // CHECK1-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
3540 // CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
3541 // CHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
3542 // CHECK1-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
3543 // CHECK1-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
3544 // CHECK1-64-NEXT:    ret void
3545 // CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
3546 // CHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
3547 // CHECK1-64-NEXT:  entry:
3548 // CHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
3549 // CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
3550 // CHECK1-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
3551 // CHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
3552 // CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
3553 // CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
3554 // CHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
3555 // CHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
3556 // CHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
3557 // CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
3558 // CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
3559 // CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
3560 // CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
3561 // CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
3562 // CHECK1-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
3563 // CHECK1-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
3564 // CHECK1-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
3565 // CHECK1-64-NEXT:    ret void
3566 // CHECK1-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
3567 // CHECK1-64-SAME: () #[[ATTR5:[0-9]+]] {
3568 // CHECK1-64-NEXT:  entry:
3569 // CHECK1-64-NEXT:    call void @__tgt_register_requires(i64 1)
3570 // CHECK1-64-NEXT:    ret void
3571 // CHECK2-32-LABEL: define {{[^@]+}}@_Z3fooiPd
3572 // CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
3573 // CHECK2-32-NEXT:  entry:
3574 // CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
3575 // CHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
3576 // CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
3577 // CHECK2-32-NEXT:    [[AA:%.*]] = alloca i16, align 2
3578 // CHECK2-32-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
3579 // CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
3580 // CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
3581 // CHECK2-32-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
3582 // CHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
3583 // CHECK2-32-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
3584 // CHECK2-32-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
3585 // CHECK2-32-NEXT:    [[P:%.*]] = alloca i32*, align 64
3586 // CHECK2-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
3587 // CHECK2-32-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
3588 // CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
3589 // CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
3590 // CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
3591 // CHECK2-32-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
3592 // CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
3593 // CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
3594 // CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
3595 // CHECK2-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
3596 // CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
3597 // CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
3598 // CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
3599 // CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
3600 // CHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
3601 // CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
3602 // CHECK2-32-NEXT:    store i16 0, i16* [[AA]], align 2
3603 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
3604 // CHECK2-32-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
3605 // CHECK2-32-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
3606 // CHECK2-32-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
3607 // CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
3608 // CHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
3609 // CHECK2-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
3610 // CHECK2-32-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
3611 // CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
3612 // CHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
3613 // CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
3614 // CHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
3615 // CHECK2-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
3616 // CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
3617 // CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
3618 // CHECK2-32-NEXT:    store i32* [[A]], i32** [[P]], align 64
3619 // CHECK2-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
3620 // CHECK2-32-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
3621 // CHECK2-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
3622 // CHECK2-32-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
3623 // CHECK2-32-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
3624 // CHECK2-32-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
3625 // CHECK2-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
3626 // CHECK2-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3627 // CHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
3628 // CHECK2-32-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
3629 // CHECK2-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3630 // CHECK2-32-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
3631 // CHECK2-32-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
3632 // CHECK2-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
3633 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP15]], align 4
3634 // CHECK2-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
3635 // CHECK2-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
3636 // CHECK2-32-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
3637 // CHECK2-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
3638 // CHECK2-32-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
3639 // CHECK2-32-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
3640 // CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
3641 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP20]], align 4
3642 // CHECK2-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
3643 // CHECK2-32-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
3644 // CHECK2-32-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
3645 // CHECK2-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
3646 // CHECK2-32-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
3647 // CHECK2-32-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
3648 // CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
3649 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP25]], align 4
3650 // CHECK2-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
3651 // CHECK2-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
3652 // CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
3653 // CHECK2-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
3654 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP28]], align 4
3655 // CHECK2-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
3656 // CHECK2-32-NEXT:    store i32 3, i32* [[TMP29]], align 4
3657 // CHECK2-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
3658 // CHECK2-32-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
3659 // CHECK2-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
3660 // CHECK2-32-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
3661 // CHECK2-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
3662 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
3663 // CHECK2-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
3664 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
3665 // CHECK2-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
3666 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP34]], align 4
3667 // CHECK2-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
3668 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP35]], align 4
3669 // CHECK2-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
3670 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP36]], align 8
3671 // CHECK2-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
3672 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP37]], align 8
3673 // CHECK2-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
3674 // CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
3675 // CHECK2-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
3676 // CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
3677 // CHECK2-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
3678 // CHECK2-32-NEXT:    store i32 0, i32* [[TMP40]], align 4
3679 // CHECK2-32-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
3680 // CHECK2-32-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
3681 // CHECK2-32-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
3682 // CHECK2-32:       omp_offload.failed:
3683 // CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
3684 // CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
3685 // CHECK2-32:       omp_offload.cont:
3686 // CHECK2-32-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
3687 // CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
3688 // CHECK2-32-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
3689 // CHECK2-32-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
3690 // CHECK2-32-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
3691 // CHECK2-32-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
3692 // CHECK2-32-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
3693 // CHECK2-32-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
3694 // CHECK2-32-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
3695 // CHECK2-32-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
3696 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
3697 // CHECK2-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
3698 // CHECK2-32-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
3699 // CHECK2-32-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
3700 // CHECK2-32-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
3701 // CHECK2-32-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
3702 // CHECK2-32-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
3703 // CHECK2-32-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
3704 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP55]], align 4
3705 // CHECK2-32-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
3706 // CHECK2-32-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
3707 // CHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
3708 // CHECK2-32-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
3709 // CHECK2-32-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
3710 // CHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
3711 // CHECK2-32-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
3712 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP60]], align 4
3713 // CHECK2-32-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
3714 // CHECK2-32-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
3715 // CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
3716 // CHECK2-32-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
3717 // CHECK2-32-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
3718 // CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
3719 // CHECK2-32-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
3720 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP65]], align 4
3721 // CHECK2-32-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
3722 // CHECK2-32-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
3723 // CHECK2-32-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
3724 // CHECK2-32-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
3725 // CHECK2-32-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
3726 // CHECK2-32-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
3727 // CHECK2-32-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
3728 // CHECK2-32-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
3729 // CHECK2-32-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
3730 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP71]], align 4
3731 // CHECK2-32-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
3732 // CHECK2-32-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
3733 // CHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
3734 // CHECK2-32-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
3735 // CHECK2-32-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
3736 // CHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
3737 // CHECK2-32-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
3738 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP76]], align 4
3739 // CHECK2-32-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
3740 // CHECK2-32-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
3741 // CHECK2-32-NEXT:    store i32 5, i32* [[TMP78]], align 4
3742 // CHECK2-32-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
3743 // CHECK2-32-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
3744 // CHECK2-32-NEXT:    store i32 5, i32* [[TMP80]], align 4
3745 // CHECK2-32-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
3746 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP81]], align 4
3747 // CHECK2-32-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
3748 // CHECK2-32-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
3749 // CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
3750 // CHECK2-32-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
3751 // CHECK2-32-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
3752 // CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
3753 // CHECK2-32-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
3754 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP86]], align 4
3755 // CHECK2-32-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
3756 // CHECK2-32-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
3757 // CHECK2-32-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
3758 // CHECK2-32-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
3759 // CHECK2-32-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
3760 // CHECK2-32-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
3761 // CHECK2-32-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
3762 // CHECK2-32-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
3763 // CHECK2-32-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
3764 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP92]], align 4
3765 // CHECK2-32-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
3766 // CHECK2-32-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
3767 // CHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
3768 // CHECK2-32-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
3769 // CHECK2-32-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
3770 // CHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
3771 // CHECK2-32-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
3772 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP97]], align 4
3773 // CHECK2-32-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
3774 // CHECK2-32-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
3775 // CHECK2-32-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
3776 // CHECK2-32-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
3777 // CHECK2-32-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
3778 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP101]], align 4
3779 // CHECK2-32-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
3780 // CHECK2-32-NEXT:    store i32 9, i32* [[TMP102]], align 4
3781 // CHECK2-32-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
3782 // CHECK2-32-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
3783 // CHECK2-32-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
3784 // CHECK2-32-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
3785 // CHECK2-32-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
3786 // CHECK2-32-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
3787 // CHECK2-32-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
3788 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
3789 // CHECK2-32-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
3790 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP107]], align 4
3791 // CHECK2-32-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
3792 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP108]], align 4
3793 // CHECK2-32-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
3794 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP109]], align 8
3795 // CHECK2-32-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
3796 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP110]], align 8
3797 // CHECK2-32-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
3798 // CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
3799 // CHECK2-32-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
3800 // CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
3801 // CHECK2-32-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
3802 // CHECK2-32-NEXT:    store i32 0, i32* [[TMP113]], align 4
3803 // CHECK2-32-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
3804 // CHECK2-32-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
3805 // CHECK2-32-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
3806 // CHECK2-32:       omp_offload.failed6:
3807 // CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
3808 // CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
3809 // CHECK2-32:       omp_offload.cont7:
3810 // CHECK2-32-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
3811 // CHECK2-32-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
3812 // CHECK2-32-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
3813 // CHECK2-32-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
3814 // CHECK2-32-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
3815 // CHECK2-32-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
3816 // CHECK2-32-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
3817 // CHECK2-32-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
3818 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP121]], align 4
3819 // CHECK2-32-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
3820 // CHECK2-32-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
3821 // CHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
3822 // CHECK2-32-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
3823 // CHECK2-32-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
3824 // CHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
3825 // CHECK2-32-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
3826 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP126]], align 4
3827 // CHECK2-32-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
3828 // CHECK2-32-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
3829 // CHECK2-32-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
3830 // CHECK2-32-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
3831 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP129]], align 4
3832 // CHECK2-32-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
3833 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP130]], align 4
3834 // CHECK2-32-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
3835 // CHECK2-32-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
3836 // CHECK2-32-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
3837 // CHECK2-32-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
3838 // CHECK2-32-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
3839 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
3840 // CHECK2-32-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
3841 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
3842 // CHECK2-32-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
3843 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP135]], align 4
3844 // CHECK2-32-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
3845 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP136]], align 4
3846 // CHECK2-32-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
3847 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP137]], align 8
3848 // CHECK2-32-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
3849 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP138]], align 8
3850 // CHECK2-32-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
3851 // CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
3852 // CHECK2-32-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
3853 // CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
3854 // CHECK2-32-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
3855 // CHECK2-32-NEXT:    store i32 0, i32* [[TMP141]], align 4
3856 // CHECK2-32-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
3857 // CHECK2-32-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
3858 // CHECK2-32-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
3859 // CHECK2-32:       omp_offload.failed12:
3860 // CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
3861 // CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
3862 // CHECK2-32:       omp_offload.cont13:
3863 // CHECK2-32-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
3864 // CHECK2-32-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
3865 // CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
3866 // CHECK2-32-NEXT:    ret i32 [[TMP144]]
3867 // CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
3868 // CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
3869 // CHECK2-32-NEXT:  entry:
3870 // CHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
3871 // CHECK2-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
3872 // CHECK2-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
3873 // CHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
3874 // CHECK2-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
3875 // CHECK2-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
3876 // CHECK2-32-NEXT:    ret void
3877 // CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
3878 // CHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
3879 // CHECK2-32-NEXT:  entry:
3880 // CHECK2-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
3881 // CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
3882 // CHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
3883 // CHECK2-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
3884 // CHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
3885 // CHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
3886 // CHECK2-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
3887 // CHECK2-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
3888 // CHECK2-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
3889 // CHECK2-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
3890 // CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
3891 // CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
3892 // CHECK2-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
3893 // CHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
3894 // CHECK2-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
3895 // CHECK2-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
3896 // CHECK2-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
3897 // CHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
3898 // CHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
3899 // CHECK2-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
3900 // CHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
3901 // CHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
3902 // CHECK2-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
3903 // CHECK2-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
3904 // CHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
3905 // CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
3906 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
3907 // CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
3908 // CHECK2-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
3909 // CHECK2-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
3910 // CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
3911 // CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
3912 // CHECK2-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
3913 // CHECK2-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
3914 // CHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
3915 // CHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
3916 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
3917 // CHECK2-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
3918 // CHECK2-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
3919 // CHECK2-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
3920 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
3921 // CHECK2-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
3922 // CHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
3923 // CHECK2-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
3924 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
3925 // CHECK2-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
3926 // CHECK2-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
3927 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
3928 // CHECK2-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
3929 // CHECK2-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
3930 // CHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
3931 // CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
3932 // CHECK2-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
3933 // CHECK2-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
3934 // CHECK2-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
3935 // CHECK2-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
3936 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
3937 // CHECK2-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
3938 // CHECK2-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
3939 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
3940 // CHECK2-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
3941 // CHECK2-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
3942 // CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
3943 // CHECK2-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
3944 // CHECK2-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
3945 // CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
3946 // CHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
3947 // CHECK2-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
3948 // CHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
3949 // CHECK2-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
3950 // CHECK2-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
3951 // CHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
3952 // CHECK2-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
3953 // CHECK2-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
3954 // CHECK2-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
3955 // CHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
3956 // CHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
3957 // CHECK2-32-NEXT:    store i64 1, i64* [[X]], align 4
3958 // CHECK2-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
3959 // CHECK2-32-NEXT:    store i8 1, i8* [[Y]], align 4
3960 // CHECK2-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
3961 // CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
3962 // CHECK2-32-NEXT:    ret void
3963 // CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
3964 // CHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
3965 // CHECK2-32-NEXT:  entry:
3966 // CHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
3967 // CHECK2-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
3968 // CHECK2-32-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
3969 // CHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
3970 // CHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
3971 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
3972 // CHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
3973 // CHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
3974 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
3975 // CHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
3976 // CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
3977 // CHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
3978 // CHECK2-32-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
3979 // CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
3980 // CHECK2-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
3981 // CHECK2-32-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
3982 // CHECK2-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
3983 // CHECK2-32-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
3984 // CHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
3985 // CHECK2-32-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
3986 // CHECK2-32-NEXT:    ret void
3987 // CHECK2-32-LABEL: define {{[^@]+}}@_Z3bariPd
3988 // CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
3989 // CHECK2-32-NEXT:  entry:
3990 // CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
3991 // CHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
3992 // CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
3993 // CHECK2-32-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
3994 // CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
3995 // CHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
3996 // CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
3997 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
3998 // CHECK2-32-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
3999 // CHECK2-32-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
4000 // CHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
4001 // CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
4002 // CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
4003 // CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
4004 // CHECK2-32-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
4005 // CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
4006 // CHECK2-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
4007 // CHECK2-32-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
4008 // CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
4009 // CHECK2-32-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
4010 // CHECK2-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
4011 // CHECK2-32-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
4012 // CHECK2-32-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
4013 // CHECK2-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
4014 // CHECK2-32-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
4015 // CHECK2-32-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
4016 // CHECK2-32-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
4017 // CHECK2-32-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
4018 // CHECK2-32-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
4019 // CHECK2-32-NEXT:    ret i32 [[TMP9]]
4020 // CHECK2-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
4021 // CHECK2-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
4022 // CHECK2-32-NEXT:  entry:
4023 // CHECK2-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
4024 // CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
4025 // CHECK2-32-NEXT:    [[B:%.*]] = alloca i32, align 4
4026 // CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
4027 // CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
4028 // CHECK2-32-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
4029 // CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
4030 // CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
4031 // CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
4032 // CHECK2-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
4033 // CHECK2-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
4034 // CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
4035 // CHECK2-32-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
4036 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
4037 // CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
4038 // CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
4039 // CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
4040 // CHECK2-32-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
4041 // CHECK2-32-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
4042 // CHECK2-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
4043 // CHECK2-32-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
4044 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
4045 // CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
4046 // CHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
4047 // CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
4048 // CHECK2-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
4049 // CHECK2-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
4050 // CHECK2-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
4051 // CHECK2-32-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
4052 // CHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
4053 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
4054 // CHECK2-32-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4055 // CHECK2-32-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
4056 // CHECK2-32-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
4057 // CHECK2-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4058 // CHECK2-32-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
4059 // CHECK2-32-NEXT:    store double* [[A]], double** [[TMP13]], align 4
4060 // CHECK2-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
4061 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP14]], align 4
4062 // CHECK2-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
4063 // CHECK2-32-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
4064 // CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
4065 // CHECK2-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
4066 // CHECK2-32-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
4067 // CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
4068 // CHECK2-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
4069 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP19]], align 4
4070 // CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
4071 // CHECK2-32-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
4072 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
4073 // CHECK2-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
4074 // CHECK2-32-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
4075 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP23]], align 4
4076 // CHECK2-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
4077 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP24]], align 4
4078 // CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
4079 // CHECK2-32-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
4080 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
4081 // CHECK2-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
4082 // CHECK2-32-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
4083 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
4084 // CHECK2-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
4085 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP29]], align 4
4086 // CHECK2-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
4087 // CHECK2-32-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
4088 // CHECK2-32-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
4089 // CHECK2-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
4090 // CHECK2-32-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
4091 // CHECK2-32-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
4092 // CHECK2-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
4093 // CHECK2-32-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
4094 // CHECK2-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
4095 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP35]], align 4
4096 // CHECK2-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4097 // CHECK2-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4098 // CHECK2-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
4099 // CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
4100 // CHECK2-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
4101 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP39]], align 4
4102 // CHECK2-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
4103 // CHECK2-32-NEXT:    store i32 5, i32* [[TMP40]], align 4
4104 // CHECK2-32-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
4105 // CHECK2-32-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
4106 // CHECK2-32-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
4107 // CHECK2-32-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
4108 // CHECK2-32-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
4109 // CHECK2-32-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
4110 // CHECK2-32-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
4111 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
4112 // CHECK2-32-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
4113 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP45]], align 4
4114 // CHECK2-32-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
4115 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP46]], align 4
4116 // CHECK2-32-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
4117 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP47]], align 8
4118 // CHECK2-32-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
4119 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP48]], align 8
4120 // CHECK2-32-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
4121 // CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
4122 // CHECK2-32-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
4123 // CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
4124 // CHECK2-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
4125 // CHECK2-32-NEXT:    store i32 0, i32* [[TMP51]], align 4
4126 // CHECK2-32-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
4127 // CHECK2-32-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
4128 // CHECK2-32-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
4129 // CHECK2-32:       omp_offload.failed:
4130 // CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
4131 // CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
4132 // CHECK2-32:       omp_offload.cont:
4133 // CHECK2-32-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
4134 // CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
4135 // CHECK2-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
4136 // CHECK2-32-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
4137 // CHECK2-32-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
4138 // CHECK2-32-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
4139 // CHECK2-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
4140 // CHECK2-32-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
4141 // CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
4142 // CHECK2-32-NEXT:    ret i32 [[ADD3]]
4143 // CHECK2-32-LABEL: define {{[^@]+}}@_ZL7fstatici
4144 // CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
4145 // CHECK2-32-NEXT:  entry:
4146 // CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
4147 // CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
4148 // CHECK2-32-NEXT:    [[AAA:%.*]] = alloca i8, align 1
4149 // CHECK2-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
4150 // CHECK2-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
4151 // CHECK2-32-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
4152 // CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
4153 // CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
4154 // CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
4155 // CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
4156 // CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
4157 // CHECK2-32-NEXT:    store i8 0, i8* [[AAA]], align 1
4158 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
4159 // CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
4160 // CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
4161 // CHECK2-32-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
4162 // CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
4163 // CHECK2-32-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
4164 // CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
4165 // CHECK2-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4166 // CHECK2-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
4167 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
4168 // CHECK2-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4169 // CHECK2-32-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
4170 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
4171 // CHECK2-32-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
4172 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP8]], align 4
4173 // CHECK2-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
4174 // CHECK2-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
4175 // CHECK2-32-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
4176 // CHECK2-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
4177 // CHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
4178 // CHECK2-32-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
4179 // CHECK2-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
4180 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP13]], align 4
4181 // CHECK2-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
4182 // CHECK2-32-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
4183 // CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
4184 // CHECK2-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
4185 // CHECK2-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
4186 // CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
4187 // CHECK2-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
4188 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP18]], align 4
4189 // CHECK2-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4190 // CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4191 // CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
4192 // CHECK2-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
4193 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
4194 // CHECK2-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
4195 // CHECK2-32-NEXT:    store i32 3, i32* [[TMP22]], align 4
4196 // CHECK2-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
4197 // CHECK2-32-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
4198 // CHECK2-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
4199 // CHECK2-32-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
4200 // CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
4201 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
4202 // CHECK2-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
4203 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
4204 // CHECK2-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
4205 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP27]], align 4
4206 // CHECK2-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
4207 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP28]], align 4
4208 // CHECK2-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
4209 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP29]], align 8
4210 // CHECK2-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
4211 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP30]], align 8
4212 // CHECK2-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
4213 // CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
4214 // CHECK2-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
4215 // CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
4216 // CHECK2-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
4217 // CHECK2-32-NEXT:    store i32 0, i32* [[TMP33]], align 4
4218 // CHECK2-32-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
4219 // CHECK2-32-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
4220 // CHECK2-32-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
4221 // CHECK2-32:       omp_offload.failed:
4222 // CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
4223 // CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
4224 // CHECK2-32:       omp_offload.cont:
4225 // CHECK2-32-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
4226 // CHECK2-32-NEXT:    ret i32 [[TMP36]]
4227 // CHECK2-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
4228 // CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
4229 // CHECK2-32-NEXT:  entry:
4230 // CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
4231 // CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
4232 // CHECK2-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
4233 // CHECK2-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
4234 // CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
4235 // CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
4236 // CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
4237 // CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
4238 // CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
4239 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
4240 // CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
4241 // CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
4242 // CHECK2-32-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4243 // CHECK2-32-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
4244 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
4245 // CHECK2-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4246 // CHECK2-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
4247 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
4248 // CHECK2-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
4249 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP6]], align 4
4250 // CHECK2-32-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
4251 // CHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
4252 // CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
4253 // CHECK2-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
4254 // CHECK2-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
4255 // CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
4256 // CHECK2-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
4257 // CHECK2-32-NEXT:    store i8* null, i8** [[TMP11]], align 4
4258 // CHECK2-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4259 // CHECK2-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4260 // CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
4261 // CHECK2-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
4262 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP14]], align 4
4263 // CHECK2-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
4264 // CHECK2-32-NEXT:    store i32 2, i32* [[TMP15]], align 4
4265 // CHECK2-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
4266 // CHECK2-32-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
4267 // CHECK2-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
4268 // CHECK2-32-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
4269 // CHECK2-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
4270 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
4271 // CHECK2-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
4272 // CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
4273 // CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
4274 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP20]], align 4
4275 // CHECK2-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
4276 // CHECK2-32-NEXT:    store i8** null, i8*** [[TMP21]], align 4
4277 // CHECK2-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
4278 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP22]], align 8
4279 // CHECK2-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
4280 // CHECK2-32-NEXT:    store i64 0, i64* [[TMP23]], align 8
4281 // CHECK2-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
4282 // CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
4283 // CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
4284 // CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
4285 // CHECK2-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
4286 // CHECK2-32-NEXT:    store i32 0, i32* [[TMP26]], align 4
4287 // CHECK2-32-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
4288 // CHECK2-32-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
4289 // CHECK2-32-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
4290 // CHECK2-32:       omp_offload.failed:
4291 // CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
4292 // CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
4293 // CHECK2-32:       omp_offload.cont:
4294 // CHECK2-32-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
4295 // CHECK2-32-NEXT:    ret i32 [[TMP29]]
4296 // CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
4297 // CHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
4298 // CHECK2-32-NEXT:  entry:
4299 // CHECK2-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
4300 // CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
4301 // CHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
4302 // CHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
4303 // CHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
4304 // CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
4305 // CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
4306 // CHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
4307 // CHECK2-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
4308 // CHECK2-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
4309 // CHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
4310 // CHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
4311 // CHECK2-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
4312 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
4313 // CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
4314 // CHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
4315 // CHECK2-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
4316 // CHECK2-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
4317 // CHECK2-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
4318 // CHECK2-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
4319 // CHECK2-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
4320 // CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
4321 // CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
4322 // CHECK2-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
4323 // CHECK2-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
4324 // CHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
4325 // CHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
4326 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
4327 // CHECK2-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
4328 // CHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
4329 // CHECK2-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
4330 // CHECK2-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
4331 // CHECK2-32-NEXT:    store double [[ADD]], double* [[A]], align 4
4332 // CHECK2-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
4333 // CHECK2-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
4334 // CHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
4335 // CHECK2-32-NEXT:    store double [[INC]], double* [[A4]], align 4
4336 // CHECK2-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
4337 // CHECK2-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
4338 // CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
4339 // CHECK2-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
4340 // CHECK2-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
4341 // CHECK2-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
4342 // CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
4343 // CHECK2-32-NEXT:    ret void
4344 // CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
4345 // CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
4346 // CHECK2-32-NEXT:  entry:
4347 // CHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
4348 // CHECK2-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
4349 // CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
4350 // CHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
4351 // CHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
4352 // CHECK2-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
4353 // CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
4354 // CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
4355 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
4356 // CHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
4357 // CHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
4358 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
4359 // CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
4360 // CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
4361 // CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
4362 // CHECK2-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
4363 // CHECK2-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
4364 // CHECK2-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
4365 // CHECK2-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
4366 // CHECK2-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
4367 // CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
4368 // CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
4369 // CHECK2-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
4370 // CHECK2-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
4371 // CHECK2-32-NEXT:    ret void
4372 // CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
4373 // CHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
4374 // CHECK2-32-NEXT:  entry:
4375 // CHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
4376 // CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
4377 // CHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
4378 // CHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
4379 // CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
4380 // CHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
4381 // CHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
4382 // CHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
4383 // CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
4384 // CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
4385 // CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
4386 // CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
4387 // CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
4388 // CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
4389 // CHECK2-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
4390 // CHECK2-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
4391 // CHECK2-32-NEXT:    ret void
4392 // CHECK2-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
4393 // CHECK2-32-SAME: () #[[ATTR5:[0-9]+]] {
4394 // CHECK2-32-NEXT:  entry:
4395 // CHECK2-32-NEXT:    call void @__tgt_register_requires(i64 1)
4396 // CHECK2-32-NEXT:    ret void
4397 // CHECK3-32-LABEL: define {{[^@]+}}@_Z3fooiPd
4398 // CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
4399 // CHECK3-32-NEXT:  entry:
4400 // CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
4401 // CHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
4402 // CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
4403 // CHECK3-32-NEXT:    [[AA:%.*]] = alloca i16, align 2
4404 // CHECK3-32-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
4405 // CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
4406 // CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
4407 // CHECK3-32-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
4408 // CHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
4409 // CHECK3-32-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
4410 // CHECK3-32-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
4411 // CHECK3-32-NEXT:    [[P:%.*]] = alloca i32*, align 64
4412 // CHECK3-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
4413 // CHECK3-32-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
4414 // CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
4415 // CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
4416 // CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
4417 // CHECK3-32-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
4418 // CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
4419 // CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
4420 // CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
4421 // CHECK3-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
4422 // CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
4423 // CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
4424 // CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
4425 // CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
4426 // CHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
4427 // CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
4428 // CHECK3-32-NEXT:    store i16 0, i16* [[AA]], align 2
4429 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
4430 // CHECK3-32-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
4431 // CHECK3-32-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
4432 // CHECK3-32-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
4433 // CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
4434 // CHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
4435 // CHECK3-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
4436 // CHECK3-32-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
4437 // CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
4438 // CHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
4439 // CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
4440 // CHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
4441 // CHECK3-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
4442 // CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
4443 // CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
4444 // CHECK3-32-NEXT:    store i32* [[A]], i32** [[P]], align 64
4445 // CHECK3-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
4446 // CHECK3-32-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
4447 // CHECK3-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
4448 // CHECK3-32-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
4449 // CHECK3-32-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
4450 // CHECK3-32-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
4451 // CHECK3-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
4452 // CHECK3-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4453 // CHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
4454 // CHECK3-32-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
4455 // CHECK3-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4456 // CHECK3-32-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
4457 // CHECK3-32-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
4458 // CHECK3-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
4459 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP15]], align 4
4460 // CHECK3-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
4461 // CHECK3-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
4462 // CHECK3-32-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
4463 // CHECK3-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
4464 // CHECK3-32-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
4465 // CHECK3-32-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
4466 // CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
4467 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP20]], align 4
4468 // CHECK3-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
4469 // CHECK3-32-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
4470 // CHECK3-32-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
4471 // CHECK3-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
4472 // CHECK3-32-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
4473 // CHECK3-32-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
4474 // CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
4475 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP25]], align 4
4476 // CHECK3-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4477 // CHECK3-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4478 // CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
4479 // CHECK3-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
4480 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP28]], align 4
4481 // CHECK3-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
4482 // CHECK3-32-NEXT:    store i32 3, i32* [[TMP29]], align 4
4483 // CHECK3-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
4484 // CHECK3-32-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
4485 // CHECK3-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
4486 // CHECK3-32-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
4487 // CHECK3-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
4488 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
4489 // CHECK3-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
4490 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
4491 // CHECK3-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
4492 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP34]], align 4
4493 // CHECK3-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
4494 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP35]], align 4
4495 // CHECK3-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
4496 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP36]], align 8
4497 // CHECK3-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
4498 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP37]], align 8
4499 // CHECK3-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
4500 // CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
4501 // CHECK3-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
4502 // CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
4503 // CHECK3-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
4504 // CHECK3-32-NEXT:    store i32 0, i32* [[TMP40]], align 4
4505 // CHECK3-32-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
4506 // CHECK3-32-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
4507 // CHECK3-32-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
4508 // CHECK3-32:       omp_offload.failed:
4509 // CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
4510 // CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
4511 // CHECK3-32:       omp_offload.cont:
4512 // CHECK3-32-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
4513 // CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
4514 // CHECK3-32-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
4515 // CHECK3-32-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
4516 // CHECK3-32-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
4517 // CHECK3-32-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
4518 // CHECK3-32-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
4519 // CHECK3-32-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
4520 // CHECK3-32-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
4521 // CHECK3-32-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
4522 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
4523 // CHECK3-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
4524 // CHECK3-32-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
4525 // CHECK3-32-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
4526 // CHECK3-32-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
4527 // CHECK3-32-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
4528 // CHECK3-32-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
4529 // CHECK3-32-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
4530 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP55]], align 4
4531 // CHECK3-32-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
4532 // CHECK3-32-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
4533 // CHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
4534 // CHECK3-32-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
4535 // CHECK3-32-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
4536 // CHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
4537 // CHECK3-32-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
4538 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP60]], align 4
4539 // CHECK3-32-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
4540 // CHECK3-32-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
4541 // CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
4542 // CHECK3-32-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
4543 // CHECK3-32-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
4544 // CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
4545 // CHECK3-32-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
4546 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP65]], align 4
4547 // CHECK3-32-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
4548 // CHECK3-32-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
4549 // CHECK3-32-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
4550 // CHECK3-32-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
4551 // CHECK3-32-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
4552 // CHECK3-32-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
4553 // CHECK3-32-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
4554 // CHECK3-32-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
4555 // CHECK3-32-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
4556 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP71]], align 4
4557 // CHECK3-32-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
4558 // CHECK3-32-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
4559 // CHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
4560 // CHECK3-32-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
4561 // CHECK3-32-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
4562 // CHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
4563 // CHECK3-32-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
4564 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP76]], align 4
4565 // CHECK3-32-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
4566 // CHECK3-32-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
4567 // CHECK3-32-NEXT:    store i32 5, i32* [[TMP78]], align 4
4568 // CHECK3-32-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
4569 // CHECK3-32-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
4570 // CHECK3-32-NEXT:    store i32 5, i32* [[TMP80]], align 4
4571 // CHECK3-32-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
4572 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP81]], align 4
4573 // CHECK3-32-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
4574 // CHECK3-32-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
4575 // CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
4576 // CHECK3-32-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
4577 // CHECK3-32-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
4578 // CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
4579 // CHECK3-32-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
4580 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP86]], align 4
4581 // CHECK3-32-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
4582 // CHECK3-32-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
4583 // CHECK3-32-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
4584 // CHECK3-32-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
4585 // CHECK3-32-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
4586 // CHECK3-32-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
4587 // CHECK3-32-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
4588 // CHECK3-32-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
4589 // CHECK3-32-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
4590 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP92]], align 4
4591 // CHECK3-32-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
4592 // CHECK3-32-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
4593 // CHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
4594 // CHECK3-32-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
4595 // CHECK3-32-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
4596 // CHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
4597 // CHECK3-32-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
4598 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP97]], align 4
4599 // CHECK3-32-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
4600 // CHECK3-32-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
4601 // CHECK3-32-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
4602 // CHECK3-32-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
4603 // CHECK3-32-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
4604 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP101]], align 4
4605 // CHECK3-32-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
4606 // CHECK3-32-NEXT:    store i32 9, i32* [[TMP102]], align 4
4607 // CHECK3-32-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
4608 // CHECK3-32-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
4609 // CHECK3-32-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
4610 // CHECK3-32-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
4611 // CHECK3-32-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
4612 // CHECK3-32-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
4613 // CHECK3-32-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
4614 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
4615 // CHECK3-32-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
4616 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP107]], align 4
4617 // CHECK3-32-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
4618 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP108]], align 4
4619 // CHECK3-32-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
4620 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP109]], align 8
4621 // CHECK3-32-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
4622 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP110]], align 8
4623 // CHECK3-32-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
4624 // CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
4625 // CHECK3-32-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
4626 // CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
4627 // CHECK3-32-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
4628 // CHECK3-32-NEXT:    store i32 0, i32* [[TMP113]], align 4
4629 // CHECK3-32-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
4630 // CHECK3-32-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
4631 // CHECK3-32-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
4632 // CHECK3-32:       omp_offload.failed6:
4633 // CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
4634 // CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
4635 // CHECK3-32:       omp_offload.cont7:
4636 // CHECK3-32-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
4637 // CHECK3-32-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
4638 // CHECK3-32-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
4639 // CHECK3-32-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
4640 // CHECK3-32-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
4641 // CHECK3-32-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
4642 // CHECK3-32-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
4643 // CHECK3-32-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
4644 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP121]], align 4
4645 // CHECK3-32-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
4646 // CHECK3-32-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
4647 // CHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
4648 // CHECK3-32-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
4649 // CHECK3-32-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
4650 // CHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
4651 // CHECK3-32-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
4652 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP126]], align 4
4653 // CHECK3-32-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
4654 // CHECK3-32-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
4655 // CHECK3-32-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
4656 // CHECK3-32-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
4657 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP129]], align 4
4658 // CHECK3-32-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
4659 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP130]], align 4
4660 // CHECK3-32-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
4661 // CHECK3-32-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
4662 // CHECK3-32-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
4663 // CHECK3-32-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
4664 // CHECK3-32-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
4665 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
4666 // CHECK3-32-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
4667 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
4668 // CHECK3-32-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
4669 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP135]], align 4
4670 // CHECK3-32-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
4671 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP136]], align 4
4672 // CHECK3-32-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
4673 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP137]], align 8
4674 // CHECK3-32-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
4675 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP138]], align 8
4676 // CHECK3-32-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
4677 // CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
4678 // CHECK3-32-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
4679 // CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
4680 // CHECK3-32-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
4681 // CHECK3-32-NEXT:    store i32 0, i32* [[TMP141]], align 4
4682 // CHECK3-32-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
4683 // CHECK3-32-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
4684 // CHECK3-32-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
4685 // CHECK3-32:       omp_offload.failed12:
4686 // CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
4687 // CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
4688 // CHECK3-32:       omp_offload.cont13:
4689 // CHECK3-32-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
4690 // CHECK3-32-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
4691 // CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
4692 // CHECK3-32-NEXT:    ret i32 [[TMP144]]
4693 // CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
4694 // CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
4695 // CHECK3-32-NEXT:  entry:
4696 // CHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
4697 // CHECK3-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
4698 // CHECK3-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
4699 // CHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
4700 // CHECK3-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
4701 // CHECK3-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
4702 // CHECK3-32-NEXT:    ret void
4703 // CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
4704 // CHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
4705 // CHECK3-32-NEXT:  entry:
4706 // CHECK3-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
4707 // CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
4708 // CHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
4709 // CHECK3-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
4710 // CHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
4711 // CHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
4712 // CHECK3-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
4713 // CHECK3-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
4714 // CHECK3-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
4715 // CHECK3-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
4716 // CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
4717 // CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
4718 // CHECK3-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
4719 // CHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
4720 // CHECK3-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
4721 // CHECK3-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
4722 // CHECK3-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
4723 // CHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
4724 // CHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
4725 // CHECK3-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
4726 // CHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
4727 // CHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
4728 // CHECK3-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
4729 // CHECK3-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
4730 // CHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
4731 // CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
4732 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
4733 // CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
4734 // CHECK3-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
4735 // CHECK3-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
4736 // CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
4737 // CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
4738 // CHECK3-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
4739 // CHECK3-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
4740 // CHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
4741 // CHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
4742 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
4743 // CHECK3-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
4744 // CHECK3-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
4745 // CHECK3-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
4746 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
4747 // CHECK3-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
4748 // CHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
4749 // CHECK3-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
4750 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
4751 // CHECK3-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
4752 // CHECK3-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
4753 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
4754 // CHECK3-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
4755 // CHECK3-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
4756 // CHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
4757 // CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
4758 // CHECK3-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
4759 // CHECK3-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
4760 // CHECK3-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
4761 // CHECK3-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
4762 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
4763 // CHECK3-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
4764 // CHECK3-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
4765 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
4766 // CHECK3-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
4767 // CHECK3-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
4768 // CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
4769 // CHECK3-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
4770 // CHECK3-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
4771 // CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
4772 // CHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
4773 // CHECK3-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
4774 // CHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
4775 // CHECK3-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
4776 // CHECK3-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
4777 // CHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
4778 // CHECK3-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
4779 // CHECK3-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
4780 // CHECK3-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
4781 // CHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
4782 // CHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
4783 // CHECK3-32-NEXT:    store i64 1, i64* [[X]], align 4
4784 // CHECK3-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
4785 // CHECK3-32-NEXT:    store i8 1, i8* [[Y]], align 4
4786 // CHECK3-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
4787 // CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
4788 // CHECK3-32-NEXT:    ret void
4789 // CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
4790 // CHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
4791 // CHECK3-32-NEXT:  entry:
4792 // CHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
4793 // CHECK3-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
4794 // CHECK3-32-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
4795 // CHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
4796 // CHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
4797 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
4798 // CHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
4799 // CHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
4800 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
4801 // CHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
4802 // CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
4803 // CHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
4804 // CHECK3-32-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
4805 // CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
4806 // CHECK3-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
4807 // CHECK3-32-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
4808 // CHECK3-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
4809 // CHECK3-32-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
4810 // CHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
4811 // CHECK3-32-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
4812 // CHECK3-32-NEXT:    ret void
4813 // CHECK3-32-LABEL: define {{[^@]+}}@_Z3bariPd
4814 // CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
4815 // CHECK3-32-NEXT:  entry:
4816 // CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
4817 // CHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
4818 // CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
4819 // CHECK3-32-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
4820 // CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
4821 // CHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
4822 // CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
4823 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
4824 // CHECK3-32-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
4825 // CHECK3-32-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
4826 // CHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
4827 // CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
4828 // CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
4829 // CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
4830 // CHECK3-32-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
4831 // CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
4832 // CHECK3-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
4833 // CHECK3-32-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
4834 // CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
4835 // CHECK3-32-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
4836 // CHECK3-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
4837 // CHECK3-32-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
4838 // CHECK3-32-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
4839 // CHECK3-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
4840 // CHECK3-32-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
4841 // CHECK3-32-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
4842 // CHECK3-32-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
4843 // CHECK3-32-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
4844 // CHECK3-32-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
4845 // CHECK3-32-NEXT:    ret i32 [[TMP9]]
4846 // CHECK3-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
4847 // CHECK3-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
4848 // CHECK3-32-NEXT:  entry:
4849 // CHECK3-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
4850 // CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
4851 // CHECK3-32-NEXT:    [[B:%.*]] = alloca i32, align 4
4852 // CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
4853 // CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
4854 // CHECK3-32-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
4855 // CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
4856 // CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
4857 // CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
4858 // CHECK3-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
4859 // CHECK3-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
4860 // CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
4861 // CHECK3-32-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
4862 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
4863 // CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
4864 // CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
4865 // CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
4866 // CHECK3-32-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
4867 // CHECK3-32-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
4868 // CHECK3-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
4869 // CHECK3-32-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
4870 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
4871 // CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
4872 // CHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
4873 // CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
4874 // CHECK3-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
4875 // CHECK3-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
4876 // CHECK3-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
4877 // CHECK3-32-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
4878 // CHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
4879 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
4880 // CHECK3-32-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4881 // CHECK3-32-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
4882 // CHECK3-32-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
4883 // CHECK3-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4884 // CHECK3-32-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
4885 // CHECK3-32-NEXT:    store double* [[A]], double** [[TMP13]], align 4
4886 // CHECK3-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
4887 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP14]], align 4
4888 // CHECK3-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
4889 // CHECK3-32-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
4890 // CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
4891 // CHECK3-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
4892 // CHECK3-32-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
4893 // CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
4894 // CHECK3-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
4895 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP19]], align 4
4896 // CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
4897 // CHECK3-32-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
4898 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
4899 // CHECK3-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
4900 // CHECK3-32-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
4901 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP23]], align 4
4902 // CHECK3-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
4903 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP24]], align 4
4904 // CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
4905 // CHECK3-32-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
4906 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
4907 // CHECK3-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
4908 // CHECK3-32-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
4909 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
4910 // CHECK3-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
4911 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP29]], align 4
4912 // CHECK3-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
4913 // CHECK3-32-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
4914 // CHECK3-32-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
4915 // CHECK3-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
4916 // CHECK3-32-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
4917 // CHECK3-32-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
4918 // CHECK3-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
4919 // CHECK3-32-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
4920 // CHECK3-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
4921 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP35]], align 4
4922 // CHECK3-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4923 // CHECK3-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4924 // CHECK3-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
4925 // CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
4926 // CHECK3-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
4927 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP39]], align 4
4928 // CHECK3-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
4929 // CHECK3-32-NEXT:    store i32 5, i32* [[TMP40]], align 4
4930 // CHECK3-32-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
4931 // CHECK3-32-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
4932 // CHECK3-32-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
4933 // CHECK3-32-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
4934 // CHECK3-32-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
4935 // CHECK3-32-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
4936 // CHECK3-32-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
4937 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
4938 // CHECK3-32-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
4939 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP45]], align 4
4940 // CHECK3-32-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
4941 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP46]], align 4
4942 // CHECK3-32-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
4943 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP47]], align 8
4944 // CHECK3-32-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
4945 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP48]], align 8
4946 // CHECK3-32-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
4947 // CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
4948 // CHECK3-32-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
4949 // CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
4950 // CHECK3-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
4951 // CHECK3-32-NEXT:    store i32 0, i32* [[TMP51]], align 4
4952 // CHECK3-32-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
4953 // CHECK3-32-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
4954 // CHECK3-32-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
4955 // CHECK3-32:       omp_offload.failed:
4956 // CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
4957 // CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
4958 // CHECK3-32:       omp_offload.cont:
4959 // CHECK3-32-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
4960 // CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
4961 // CHECK3-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
4962 // CHECK3-32-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
4963 // CHECK3-32-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
4964 // CHECK3-32-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
4965 // CHECK3-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
4966 // CHECK3-32-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
4967 // CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
4968 // CHECK3-32-NEXT:    ret i32 [[ADD3]]
4969 // CHECK3-32-LABEL: define {{[^@]+}}@_ZL7fstatici
4970 // CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
4971 // CHECK3-32-NEXT:  entry:
4972 // CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
4973 // CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
4974 // CHECK3-32-NEXT:    [[AAA:%.*]] = alloca i8, align 1
4975 // CHECK3-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
4976 // CHECK3-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
4977 // CHECK3-32-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
4978 // CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
4979 // CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
4980 // CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
4981 // CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
4982 // CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
4983 // CHECK3-32-NEXT:    store i8 0, i8* [[AAA]], align 1
4984 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
4985 // CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
4986 // CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
4987 // CHECK3-32-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
4988 // CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
4989 // CHECK3-32-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
4990 // CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
4991 // CHECK3-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
4992 // CHECK3-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
4993 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
4994 // CHECK3-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
4995 // CHECK3-32-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
4996 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
4997 // CHECK3-32-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
4998 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP8]], align 4
4999 // CHECK3-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
5000 // CHECK3-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
5001 // CHECK3-32-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
5002 // CHECK3-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
5003 // CHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
5004 // CHECK3-32-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
5005 // CHECK3-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
5006 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP13]], align 4
5007 // CHECK3-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
5008 // CHECK3-32-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
5009 // CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
5010 // CHECK3-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
5011 // CHECK3-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
5012 // CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
5013 // CHECK3-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
5014 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP18]], align 4
5015 // CHECK3-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
5016 // CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
5017 // CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
5018 // CHECK3-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
5019 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
5020 // CHECK3-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
5021 // CHECK3-32-NEXT:    store i32 3, i32* [[TMP22]], align 4
5022 // CHECK3-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
5023 // CHECK3-32-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
5024 // CHECK3-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
5025 // CHECK3-32-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
5026 // CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
5027 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
5028 // CHECK3-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
5029 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
5030 // CHECK3-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
5031 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP27]], align 4
5032 // CHECK3-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
5033 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP28]], align 4
5034 // CHECK3-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
5035 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP29]], align 8
5036 // CHECK3-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
5037 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP30]], align 8
5038 // CHECK3-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
5039 // CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
5040 // CHECK3-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
5041 // CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
5042 // CHECK3-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
5043 // CHECK3-32-NEXT:    store i32 0, i32* [[TMP33]], align 4
5044 // CHECK3-32-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
5045 // CHECK3-32-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
5046 // CHECK3-32-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
5047 // CHECK3-32:       omp_offload.failed:
5048 // CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
5049 // CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
5050 // CHECK3-32:       omp_offload.cont:
5051 // CHECK3-32-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
5052 // CHECK3-32-NEXT:    ret i32 [[TMP36]]
5053 // CHECK3-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
5054 // CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
5055 // CHECK3-32-NEXT:  entry:
5056 // CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
5057 // CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
5058 // CHECK3-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
5059 // CHECK3-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
5060 // CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
5061 // CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
5062 // CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
5063 // CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
5064 // CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
5065 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
5066 // CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
5067 // CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
5068 // CHECK3-32-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
5069 // CHECK3-32-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
5070 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
5071 // CHECK3-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
5072 // CHECK3-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
5073 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
5074 // CHECK3-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
5075 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP6]], align 4
5076 // CHECK3-32-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
5077 // CHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
5078 // CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
5079 // CHECK3-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
5080 // CHECK3-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
5081 // CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
5082 // CHECK3-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
5083 // CHECK3-32-NEXT:    store i8* null, i8** [[TMP11]], align 4
5084 // CHECK3-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
5085 // CHECK3-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
5086 // CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
5087 // CHECK3-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
5088 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP14]], align 4
5089 // CHECK3-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
5090 // CHECK3-32-NEXT:    store i32 2, i32* [[TMP15]], align 4
5091 // CHECK3-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
5092 // CHECK3-32-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
5093 // CHECK3-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
5094 // CHECK3-32-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
5095 // CHECK3-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
5096 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
5097 // CHECK3-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
5098 // CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
5099 // CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
5100 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP20]], align 4
5101 // CHECK3-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
5102 // CHECK3-32-NEXT:    store i8** null, i8*** [[TMP21]], align 4
5103 // CHECK3-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
5104 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP22]], align 8
5105 // CHECK3-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
5106 // CHECK3-32-NEXT:    store i64 0, i64* [[TMP23]], align 8
5107 // CHECK3-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
5108 // CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
5109 // CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
5110 // CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
5111 // CHECK3-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
5112 // CHECK3-32-NEXT:    store i32 0, i32* [[TMP26]], align 4
5113 // CHECK3-32-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
5114 // CHECK3-32-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
5115 // CHECK3-32-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
5116 // CHECK3-32:       omp_offload.failed:
5117 // CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
5118 // CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
5119 // CHECK3-32:       omp_offload.cont:
5120 // CHECK3-32-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
5121 // CHECK3-32-NEXT:    ret i32 [[TMP29]]
5122 // CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
5123 // CHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
5124 // CHECK3-32-NEXT:  entry:
5125 // CHECK3-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
5126 // CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
5127 // CHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
5128 // CHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
5129 // CHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
5130 // CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
5131 // CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
5132 // CHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
5133 // CHECK3-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
5134 // CHECK3-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
5135 // CHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
5136 // CHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
5137 // CHECK3-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
5138 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
5139 // CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
5140 // CHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
5141 // CHECK3-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
5142 // CHECK3-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
5143 // CHECK3-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
5144 // CHECK3-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
5145 // CHECK3-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
5146 // CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
5147 // CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
5148 // CHECK3-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
5149 // CHECK3-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
5150 // CHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
5151 // CHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
5152 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
5153 // CHECK3-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
5154 // CHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
5155 // CHECK3-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
5156 // CHECK3-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
5157 // CHECK3-32-NEXT:    store double [[ADD]], double* [[A]], align 4
5158 // CHECK3-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
5159 // CHECK3-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
5160 // CHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
5161 // CHECK3-32-NEXT:    store double [[INC]], double* [[A4]], align 4
5162 // CHECK3-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
5163 // CHECK3-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
5164 // CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
5165 // CHECK3-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
5166 // CHECK3-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
5167 // CHECK3-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
5168 // CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
5169 // CHECK3-32-NEXT:    ret void
5170 // CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
5171 // CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
5172 // CHECK3-32-NEXT:  entry:
5173 // CHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
5174 // CHECK3-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
5175 // CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
5176 // CHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
5177 // CHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
5178 // CHECK3-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
5179 // CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
5180 // CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
5181 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
5182 // CHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
5183 // CHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5184 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
5185 // CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
5186 // CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5187 // CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
5188 // CHECK3-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
5189 // CHECK3-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
5190 // CHECK3-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
5191 // CHECK3-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
5192 // CHECK3-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
5193 // CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
5194 // CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5195 // CHECK3-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
5196 // CHECK3-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
5197 // CHECK3-32-NEXT:    ret void
5198 // CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
5199 // CHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
5200 // CHECK3-32-NEXT:  entry:
5201 // CHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
5202 // CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
5203 // CHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
5204 // CHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
5205 // CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
5206 // CHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
5207 // CHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
5208 // CHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5209 // CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
5210 // CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
5211 // CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5212 // CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
5213 // CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
5214 // CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5215 // CHECK3-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
5216 // CHECK3-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
5217 // CHECK3-32-NEXT:    ret void
5218 // CHECK3-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
5219 // CHECK3-32-SAME: () #[[ATTR5:[0-9]+]] {
5220 // CHECK3-32-NEXT:  entry:
5221 // CHECK3-32-NEXT:    call void @__tgt_register_requires(i64 1)
5222 // CHECK3-32-NEXT:    ret void
5223 // TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
5224 // TCHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
5225 // TCHECK-64-NEXT:  entry:
5226 // TCHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
5227 // TCHECK-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
5228 // TCHECK-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
5229 // TCHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
5230 // TCHECK-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
5231 // TCHECK-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
5232 // TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
5233 // TCHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
5234 // TCHECK-64-NEXT:    ret void
5235 // TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
5236 // TCHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
5237 // TCHECK-64-NEXT:  entry:
5238 // TCHECK-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
5239 // TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
5240 // TCHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
5241 // TCHECK-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
5242 // TCHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
5243 // TCHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
5244 // TCHECK-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
5245 // TCHECK-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
5246 // TCHECK-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
5247 // TCHECK-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
5248 // TCHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
5249 // TCHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
5250 // TCHECK-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
5251 // TCHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
5252 // TCHECK-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
5253 // TCHECK-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
5254 // TCHECK-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
5255 // TCHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
5256 // TCHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
5257 // TCHECK-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
5258 // TCHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
5259 // TCHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
5260 // TCHECK-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
5261 // TCHECK-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
5262 // TCHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
5263 // TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
5264 // TCHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
5265 // TCHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
5266 // TCHECK-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
5267 // TCHECK-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
5268 // TCHECK-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
5269 // TCHECK-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
5270 // TCHECK-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
5271 // TCHECK-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
5272 // TCHECK-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
5273 // TCHECK-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
5274 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
5275 // TCHECK-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
5276 // TCHECK-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
5277 // TCHECK-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
5278 // TCHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
5279 // TCHECK-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
5280 // TCHECK-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
5281 // TCHECK-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
5282 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
5283 // TCHECK-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
5284 // TCHECK-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
5285 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
5286 // TCHECK-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
5287 // TCHECK-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
5288 // TCHECK-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
5289 // TCHECK-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
5290 // TCHECK-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
5291 // TCHECK-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
5292 // TCHECK-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
5293 // TCHECK-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
5294 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
5295 // TCHECK-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
5296 // TCHECK-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
5297 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
5298 // TCHECK-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
5299 // TCHECK-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
5300 // TCHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
5301 // TCHECK-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
5302 // TCHECK-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
5303 // TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
5304 // TCHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
5305 // TCHECK-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
5306 // TCHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
5307 // TCHECK-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
5308 // TCHECK-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
5309 // TCHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
5310 // TCHECK-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
5311 // TCHECK-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
5312 // TCHECK-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
5313 // TCHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
5314 // TCHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
5315 // TCHECK-64-NEXT:    store i64 1, i64* [[X]], align 8
5316 // TCHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
5317 // TCHECK-64-NEXT:    store i8 1, i8* [[Y]], align 8
5318 // TCHECK-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
5319 // TCHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
5320 // TCHECK-64-NEXT:    ret void
5321 // TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
5322 // TCHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
5323 // TCHECK-64-NEXT:  entry:
5324 // TCHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
5325 // TCHECK-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
5326 // TCHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
5327 // TCHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
5328 // TCHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
5329 // TCHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
5330 // TCHECK-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
5331 // TCHECK-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
5332 // TCHECK-64-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
5333 // TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
5334 // TCHECK-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
5335 // TCHECK-64-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
5336 // TCHECK-64-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
5337 // TCHECK-64-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
5338 // TCHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
5339 // TCHECK-64-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 8
5340 // TCHECK-64-NEXT:    ret void
5341 // TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
5342 // TCHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
5343 // TCHECK-64-NEXT:  entry:
5344 // TCHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
5345 // TCHECK-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
5346 // TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
5347 // TCHECK-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
5348 // TCHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
5349 // TCHECK-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
5350 // TCHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
5351 // TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
5352 // TCHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
5353 // TCHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
5354 // TCHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
5355 // TCHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5356 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
5357 // TCHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
5358 // TCHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5359 // TCHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
5360 // TCHECK-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
5361 // TCHECK-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
5362 // TCHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
5363 // TCHECK-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
5364 // TCHECK-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
5365 // TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
5366 // TCHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5367 // TCHECK-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
5368 // TCHECK-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
5369 // TCHECK-64-NEXT:    ret void
5370 // TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
5371 // TCHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
5372 // TCHECK-64-NEXT:  entry:
5373 // TCHECK-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
5374 // TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
5375 // TCHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
5376 // TCHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
5377 // TCHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
5378 // TCHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
5379 // TCHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
5380 // TCHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
5381 // TCHECK-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
5382 // TCHECK-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
5383 // TCHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
5384 // TCHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
5385 // TCHECK-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
5386 // TCHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
5387 // TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
5388 // TCHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
5389 // TCHECK-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
5390 // TCHECK-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
5391 // TCHECK-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
5392 // TCHECK-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
5393 // TCHECK-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
5394 // TCHECK-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
5395 // TCHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
5396 // TCHECK-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
5397 // TCHECK-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
5398 // TCHECK-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
5399 // TCHECK-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
5400 // TCHECK-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
5401 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
5402 // TCHECK-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
5403 // TCHECK-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
5404 // TCHECK-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
5405 // TCHECK-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
5406 // TCHECK-64-NEXT:    store double [[ADD]], double* [[A]], align 8
5407 // TCHECK-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
5408 // TCHECK-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
5409 // TCHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
5410 // TCHECK-64-NEXT:    store double [[INC]], double* [[A5]], align 8
5411 // TCHECK-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
5412 // TCHECK-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
5413 // TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
5414 // TCHECK-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
5415 // TCHECK-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
5416 // TCHECK-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
5417 // TCHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
5418 // TCHECK-64-NEXT:    ret void
5419 // TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
5420 // TCHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
5421 // TCHECK-64-NEXT:  entry:
5422 // TCHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
5423 // TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
5424 // TCHECK-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
5425 // TCHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
5426 // TCHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
5427 // TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
5428 // TCHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
5429 // TCHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
5430 // TCHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5431 // TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
5432 // TCHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
5433 // TCHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5434 // TCHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
5435 // TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
5436 // TCHECK-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5437 // TCHECK-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
5438 // TCHECK-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
5439 // TCHECK-64-NEXT:    ret void
5440 // TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
5441 // TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
5442 // TCHECK1-64-NEXT:  entry:
5443 // TCHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
5444 // TCHECK1-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
5445 // TCHECK1-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
5446 // TCHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
5447 // TCHECK1-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
5448 // TCHECK1-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
5449 // TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
5450 // TCHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
5451 // TCHECK1-64-NEXT:    ret void
5452 // TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
5453 // TCHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
5454 // TCHECK1-64-NEXT:  entry:
5455 // TCHECK1-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
5456 // TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
5457 // TCHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
5458 // TCHECK1-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
5459 // TCHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
5460 // TCHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
5461 // TCHECK1-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
5462 // TCHECK1-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
5463 // TCHECK1-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
5464 // TCHECK1-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
5465 // TCHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
5466 // TCHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
5467 // TCHECK1-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
5468 // TCHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
5469 // TCHECK1-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
5470 // TCHECK1-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
5471 // TCHECK1-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
5472 // TCHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
5473 // TCHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
5474 // TCHECK1-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
5475 // TCHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
5476 // TCHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
5477 // TCHECK1-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
5478 // TCHECK1-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
5479 // TCHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
5480 // TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
5481 // TCHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
5482 // TCHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
5483 // TCHECK1-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
5484 // TCHECK1-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
5485 // TCHECK1-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
5486 // TCHECK1-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
5487 // TCHECK1-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
5488 // TCHECK1-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
5489 // TCHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
5490 // TCHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
5491 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
5492 // TCHECK1-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
5493 // TCHECK1-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
5494 // TCHECK1-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
5495 // TCHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
5496 // TCHECK1-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
5497 // TCHECK1-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
5498 // TCHECK1-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
5499 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
5500 // TCHECK1-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
5501 // TCHECK1-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
5502 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
5503 // TCHECK1-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
5504 // TCHECK1-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
5505 // TCHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
5506 // TCHECK1-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
5507 // TCHECK1-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
5508 // TCHECK1-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
5509 // TCHECK1-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
5510 // TCHECK1-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
5511 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
5512 // TCHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
5513 // TCHECK1-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
5514 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
5515 // TCHECK1-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
5516 // TCHECK1-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
5517 // TCHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
5518 // TCHECK1-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
5519 // TCHECK1-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
5520 // TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
5521 // TCHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
5522 // TCHECK1-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
5523 // TCHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
5524 // TCHECK1-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
5525 // TCHECK1-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
5526 // TCHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
5527 // TCHECK1-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
5528 // TCHECK1-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
5529 // TCHECK1-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
5530 // TCHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
5531 // TCHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
5532 // TCHECK1-64-NEXT:    store i64 1, i64* [[X]], align 8
5533 // TCHECK1-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
5534 // TCHECK1-64-NEXT:    store i8 1, i8* [[Y]], align 8
5535 // TCHECK1-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
5536 // TCHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
5537 // TCHECK1-64-NEXT:    ret void
5538 // TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
5539 // TCHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
5540 // TCHECK1-64-NEXT:  entry:
5541 // TCHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
5542 // TCHECK1-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
5543 // TCHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
5544 // TCHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
5545 // TCHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
5546 // TCHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
5547 // TCHECK1-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
5548 // TCHECK1-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
5549 // TCHECK1-64-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
5550 // TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
5551 // TCHECK1-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
5552 // TCHECK1-64-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
5553 // TCHECK1-64-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
5554 // TCHECK1-64-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
5555 // TCHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
5556 // TCHECK1-64-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 8
5557 // TCHECK1-64-NEXT:    ret void
5558 // TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
5559 // TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
5560 // TCHECK1-64-NEXT:  entry:
5561 // TCHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
5562 // TCHECK1-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
5563 // TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
5564 // TCHECK1-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
5565 // TCHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
5566 // TCHECK1-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
5567 // TCHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
5568 // TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
5569 // TCHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
5570 // TCHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
5571 // TCHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
5572 // TCHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5573 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
5574 // TCHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
5575 // TCHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5576 // TCHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
5577 // TCHECK1-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
5578 // TCHECK1-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
5579 // TCHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
5580 // TCHECK1-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
5581 // TCHECK1-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
5582 // TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
5583 // TCHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5584 // TCHECK1-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
5585 // TCHECK1-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
5586 // TCHECK1-64-NEXT:    ret void
5587 // TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
5588 // TCHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
5589 // TCHECK1-64-NEXT:  entry:
5590 // TCHECK1-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
5591 // TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
5592 // TCHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
5593 // TCHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
5594 // TCHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
5595 // TCHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
5596 // TCHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
5597 // TCHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
5598 // TCHECK1-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
5599 // TCHECK1-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
5600 // TCHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
5601 // TCHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
5602 // TCHECK1-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
5603 // TCHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
5604 // TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
5605 // TCHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
5606 // TCHECK1-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
5607 // TCHECK1-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
5608 // TCHECK1-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
5609 // TCHECK1-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
5610 // TCHECK1-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
5611 // TCHECK1-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
5612 // TCHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
5613 // TCHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
5614 // TCHECK1-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
5615 // TCHECK1-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
5616 // TCHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
5617 // TCHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
5618 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
5619 // TCHECK1-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
5620 // TCHECK1-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
5621 // TCHECK1-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
5622 // TCHECK1-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
5623 // TCHECK1-64-NEXT:    store double [[ADD]], double* [[A]], align 8
5624 // TCHECK1-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
5625 // TCHECK1-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
5626 // TCHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
5627 // TCHECK1-64-NEXT:    store double [[INC]], double* [[A5]], align 8
5628 // TCHECK1-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
5629 // TCHECK1-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
5630 // TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
5631 // TCHECK1-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
5632 // TCHECK1-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
5633 // TCHECK1-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
5634 // TCHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
5635 // TCHECK1-64-NEXT:    ret void
5636 // TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
5637 // TCHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
5638 // TCHECK1-64-NEXT:  entry:
5639 // TCHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
5640 // TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
5641 // TCHECK1-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
5642 // TCHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
5643 // TCHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
5644 // TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
5645 // TCHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
5646 // TCHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
5647 // TCHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5648 // TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
5649 // TCHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
5650 // TCHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5651 // TCHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
5652 // TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
5653 // TCHECK1-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5654 // TCHECK1-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
5655 // TCHECK1-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
5656 // TCHECK1-64-NEXT:    ret void
5657 // TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
5658 // TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
5659 // TCHECK2-32-NEXT:  entry:
5660 // TCHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
5661 // TCHECK2-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
5662 // TCHECK2-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
5663 // TCHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
5664 // TCHECK2-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
5665 // TCHECK2-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
5666 // TCHECK2-32-NEXT:    ret void
5667 // TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
5668 // TCHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
5669 // TCHECK2-32-NEXT:  entry:
5670 // TCHECK2-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
5671 // TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
5672 // TCHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
5673 // TCHECK2-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
5674 // TCHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
5675 // TCHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
5676 // TCHECK2-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
5677 // TCHECK2-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
5678 // TCHECK2-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
5679 // TCHECK2-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
5680 // TCHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
5681 // TCHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
5682 // TCHECK2-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
5683 // TCHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
5684 // TCHECK2-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
5685 // TCHECK2-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
5686 // TCHECK2-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
5687 // TCHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
5688 // TCHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
5689 // TCHECK2-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
5690 // TCHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
5691 // TCHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
5692 // TCHECK2-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
5693 // TCHECK2-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
5694 // TCHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
5695 // TCHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
5696 // TCHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
5697 // TCHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
5698 // TCHECK2-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
5699 // TCHECK2-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
5700 // TCHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
5701 // TCHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
5702 // TCHECK2-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
5703 // TCHECK2-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
5704 // TCHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
5705 // TCHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
5706 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
5707 // TCHECK2-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
5708 // TCHECK2-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
5709 // TCHECK2-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
5710 // TCHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
5711 // TCHECK2-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
5712 // TCHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
5713 // TCHECK2-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
5714 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
5715 // TCHECK2-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
5716 // TCHECK2-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
5717 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
5718 // TCHECK2-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
5719 // TCHECK2-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
5720 // TCHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
5721 // TCHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
5722 // TCHECK2-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
5723 // TCHECK2-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
5724 // TCHECK2-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
5725 // TCHECK2-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
5726 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
5727 // TCHECK2-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
5728 // TCHECK2-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
5729 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
5730 // TCHECK2-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
5731 // TCHECK2-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
5732 // TCHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
5733 // TCHECK2-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
5734 // TCHECK2-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
5735 // TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
5736 // TCHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
5737 // TCHECK2-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
5738 // TCHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
5739 // TCHECK2-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
5740 // TCHECK2-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
5741 // TCHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
5742 // TCHECK2-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
5743 // TCHECK2-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
5744 // TCHECK2-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
5745 // TCHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
5746 // TCHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
5747 // TCHECK2-32-NEXT:    store i64 1, i64* [[X]], align 4
5748 // TCHECK2-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
5749 // TCHECK2-32-NEXT:    store i8 1, i8* [[Y]], align 4
5750 // TCHECK2-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
5751 // TCHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
5752 // TCHECK2-32-NEXT:    ret void
5753 // TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
5754 // TCHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
5755 // TCHECK2-32-NEXT:  entry:
5756 // TCHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
5757 // TCHECK2-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
5758 // TCHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
5759 // TCHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
5760 // TCHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
5761 // TCHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
5762 // TCHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
5763 // TCHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
5764 // TCHECK2-32-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
5765 // TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
5766 // TCHECK2-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
5767 // TCHECK2-32-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
5768 // TCHECK2-32-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
5769 // TCHECK2-32-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
5770 // TCHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
5771 // TCHECK2-32-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 4
5772 // TCHECK2-32-NEXT:    ret void
5773 // TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
5774 // TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
5775 // TCHECK2-32-NEXT:  entry:
5776 // TCHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
5777 // TCHECK2-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
5778 // TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
5779 // TCHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
5780 // TCHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
5781 // TCHECK2-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
5782 // TCHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
5783 // TCHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
5784 // TCHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
5785 // TCHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
5786 // TCHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5787 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
5788 // TCHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
5789 // TCHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5790 // TCHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
5791 // TCHECK2-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
5792 // TCHECK2-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
5793 // TCHECK2-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
5794 // TCHECK2-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
5795 // TCHECK2-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
5796 // TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
5797 // TCHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5798 // TCHECK2-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
5799 // TCHECK2-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
5800 // TCHECK2-32-NEXT:    ret void
5801 // TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
5802 // TCHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
5803 // TCHECK2-32-NEXT:  entry:
5804 // TCHECK2-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
5805 // TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
5806 // TCHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
5807 // TCHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
5808 // TCHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
5809 // TCHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
5810 // TCHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
5811 // TCHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
5812 // TCHECK2-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
5813 // TCHECK2-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
5814 // TCHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
5815 // TCHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
5816 // TCHECK2-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
5817 // TCHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
5818 // TCHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
5819 // TCHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
5820 // TCHECK2-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
5821 // TCHECK2-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
5822 // TCHECK2-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
5823 // TCHECK2-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
5824 // TCHECK2-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
5825 // TCHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
5826 // TCHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
5827 // TCHECK2-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
5828 // TCHECK2-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
5829 // TCHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
5830 // TCHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
5831 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
5832 // TCHECK2-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
5833 // TCHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
5834 // TCHECK2-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
5835 // TCHECK2-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
5836 // TCHECK2-32-NEXT:    store double [[ADD]], double* [[A]], align 4
5837 // TCHECK2-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
5838 // TCHECK2-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
5839 // TCHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
5840 // TCHECK2-32-NEXT:    store double [[INC]], double* [[A4]], align 4
5841 // TCHECK2-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
5842 // TCHECK2-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
5843 // TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
5844 // TCHECK2-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
5845 // TCHECK2-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
5846 // TCHECK2-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
5847 // TCHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
5848 // TCHECK2-32-NEXT:    ret void
5849 // TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
5850 // TCHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
5851 // TCHECK2-32-NEXT:  entry:
5852 // TCHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
5853 // TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
5854 // TCHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
5855 // TCHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
5856 // TCHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
5857 // TCHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
5858 // TCHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
5859 // TCHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5860 // TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
5861 // TCHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
5862 // TCHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
5863 // TCHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
5864 // TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
5865 // TCHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
5866 // TCHECK2-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
5867 // TCHECK2-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
5868 // TCHECK2-32-NEXT:    ret void
5869 // TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
5870 // TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
5871 // TCHECK3-32-NEXT:  entry:
5872 // TCHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
5873 // TCHECK3-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
5874 // TCHECK3-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
5875 // TCHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
5876 // TCHECK3-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
5877 // TCHECK3-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
5878 // TCHECK3-32-NEXT:    ret void
5879 // TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
5880 // TCHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
5881 // TCHECK3-32-NEXT:  entry:
5882 // TCHECK3-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
5883 // TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
5884 // TCHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
5885 // TCHECK3-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
5886 // TCHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
5887 // TCHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
5888 // TCHECK3-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
5889 // TCHECK3-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
5890 // TCHECK3-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
5891 // TCHECK3-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
5892 // TCHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
5893 // TCHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
5894 // TCHECK3-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
5895 // TCHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
5896 // TCHECK3-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
5897 // TCHECK3-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
5898 // TCHECK3-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
5899 // TCHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
5900 // TCHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
5901 // TCHECK3-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
5902 // TCHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
5903 // TCHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
5904 // TCHECK3-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
5905 // TCHECK3-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
5906 // TCHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
5907 // TCHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
5908 // TCHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
5909 // TCHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
5910 // TCHECK3-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
5911 // TCHECK3-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
5912 // TCHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
5913 // TCHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
5914 // TCHECK3-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
5915 // TCHECK3-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
5916 // TCHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
5917 // TCHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
5918 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
5919 // TCHECK3-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
5920 // TCHECK3-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
5921 // TCHECK3-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
5922 // TCHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
5923 // TCHECK3-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
5924 // TCHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
5925 // TCHECK3-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
5926 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
5927 // TCHECK3-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
5928 // TCHECK3-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
5929 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
5930 // TCHECK3-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
5931 // TCHECK3-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
5932 // TCHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
5933 // TCHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
5934 // TCHECK3-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
5935 // TCHECK3-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
5936 // TCHECK3-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
5937 // TCHECK3-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
5938 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
5939 // TCHECK3-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
5940 // TCHECK3-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
5941 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
5942 // TCHECK3-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
5943 // TCHECK3-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
5944 // TCHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
5945 // TCHECK3-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
5946 // TCHECK3-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
5947 // TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
5948 // TCHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
5949 // TCHECK3-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
5950 // TCHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
5951 // TCHECK3-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
5952 // TCHECK3-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
5953 // TCHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
5954 // TCHECK3-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
5955 // TCHECK3-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
5956 // TCHECK3-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
5957 // TCHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
5958 // TCHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
5959 // TCHECK3-32-NEXT:    store i64 1, i64* [[X]], align 4
5960 // TCHECK3-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
5961 // TCHECK3-32-NEXT:    store i8 1, i8* [[Y]], align 4
5962 // TCHECK3-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
5963 // TCHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
5964 // TCHECK3-32-NEXT:    ret void
5965 // TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
5966 // TCHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
5967 // TCHECK3-32-NEXT:  entry:
5968 // TCHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
5969 // TCHECK3-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
5970 // TCHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
5971 // TCHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
5972 // TCHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
5973 // TCHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
5974 // TCHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
5975 // TCHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
5976 // TCHECK3-32-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
5977 // TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
5978 // TCHECK3-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
5979 // TCHECK3-32-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
5980 // TCHECK3-32-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
5981 // TCHECK3-32-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
5982 // TCHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
5983 // TCHECK3-32-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 4
5984 // TCHECK3-32-NEXT:    ret void
5985 // TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
5986 // TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
5987 // TCHECK3-32-NEXT:  entry:
5988 // TCHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
5989 // TCHECK3-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
5990 // TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
5991 // TCHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
5992 // TCHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
5993 // TCHECK3-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
5994 // TCHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
5995 // TCHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
5996 // TCHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
5997 // TCHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
5998 // TCHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
5999 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
6000 // TCHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
6001 // TCHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
6002 // TCHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
6003 // TCHECK3-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
6004 // TCHECK3-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
6005 // TCHECK3-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
6006 // TCHECK3-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
6007 // TCHECK3-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
6008 // TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
6009 // TCHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
6010 // TCHECK3-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
6011 // TCHECK3-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
6012 // TCHECK3-32-NEXT:    ret void
6013 // TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
6014 // TCHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
6015 // TCHECK3-32-NEXT:  entry:
6016 // TCHECK3-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
6017 // TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
6018 // TCHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
6019 // TCHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
6020 // TCHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
6021 // TCHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
6022 // TCHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
6023 // TCHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
6024 // TCHECK3-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
6025 // TCHECK3-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
6026 // TCHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
6027 // TCHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
6028 // TCHECK3-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
6029 // TCHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
6030 // TCHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
6031 // TCHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
6032 // TCHECK3-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
6033 // TCHECK3-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
6034 // TCHECK3-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
6035 // TCHECK3-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
6036 // TCHECK3-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
6037 // TCHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
6038 // TCHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
6039 // TCHECK3-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
6040 // TCHECK3-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
6041 // TCHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
6042 // TCHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
6043 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
6044 // TCHECK3-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
6045 // TCHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
6046 // TCHECK3-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
6047 // TCHECK3-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
6048 // TCHECK3-32-NEXT:    store double [[ADD]], double* [[A]], align 4
6049 // TCHECK3-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
6050 // TCHECK3-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
6051 // TCHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
6052 // TCHECK3-32-NEXT:    store double [[INC]], double* [[A4]], align 4
6053 // TCHECK3-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
6054 // TCHECK3-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
6055 // TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
6056 // TCHECK3-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
6057 // TCHECK3-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
6058 // TCHECK3-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
6059 // TCHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
6060 // TCHECK3-32-NEXT:    ret void
6061 // TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
6062 // TCHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
6063 // TCHECK3-32-NEXT:  entry:
6064 // TCHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
6065 // TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
6066 // TCHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
6067 // TCHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
6068 // TCHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
6069 // TCHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
6070 // TCHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
6071 // TCHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
6072 // TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
6073 // TCHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
6074 // TCHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
6075 // TCHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
6076 // TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
6077 // TCHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
6078 // TCHECK3-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
6079 // TCHECK3-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
6080 // TCHECK3-32-NEXT:    ret void
6081 // CHECK0-LABEL: define {{[^@]+}}@_Z3fooiPd
6082 // CHECK0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
6083 // CHECK0-NEXT:  entry:
6084 // CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
6085 // CHECK0-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
6086 // CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
6087 // CHECK0-NEXT:    [[AA:%.*]] = alloca i16, align 2
6088 // CHECK0-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
6089 // CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
6090 // CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
6091 // CHECK0-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
6092 // CHECK0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
6093 // CHECK0-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
6094 // CHECK0-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
6095 // CHECK0-NEXT:    [[P:%.*]] = alloca ptr, align 64
6096 // CHECK0-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
6097 // CHECK0-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
6098 // CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
6099 // CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
6100 // CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
6101 // CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
6102 // CHECK0-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
6103 // CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 8
6104 // CHECK0-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 8
6105 // CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 8
6106 // CHECK0-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
6107 // CHECK0-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
6108 // CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
6109 // CHECK0-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
6110 // CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
6111 // CHECK0-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
6112 // CHECK0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
6113 // CHECK0-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
6114 // CHECK0-NEXT:    store i32 0, ptr [[A]], align 4
6115 // CHECK0-NEXT:    store i16 0, ptr [[AA]], align 2
6116 // CHECK0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
6117 // CHECK0-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
6118 // CHECK0-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
6119 // CHECK0-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
6120 // CHECK0-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
6121 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
6122 // CHECK0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
6123 // CHECK0-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
6124 // CHECK0-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
6125 // CHECK0-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
6126 // CHECK0-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
6127 // CHECK0-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
6128 // CHECK0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
6129 // CHECK0-NEXT:    store i32 [[TMP6]], ptr [[X]], align 4
6130 // CHECK0-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
6131 // CHECK0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
6132 // CHECK0-NEXT:    store i32 [[TMP7]], ptr [[Y]], align 4
6133 // CHECK0-NEXT:    store ptr [[A]], ptr [[P]], align 64
6134 // CHECK0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
6135 // CHECK0-NEXT:    store i32 [[TMP8]], ptr [[A_CASTED]], align 4
6136 // CHECK0-NEXT:    [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8
6137 // CHECK0-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[P]], align 64
6138 // CHECK0-NEXT:    [[TMP11:%.*]] = load i32, ptr @ga, align 4
6139 // CHECK0-NEXT:    store i32 [[TMP11]], ptr [[GA_CASTED]], align 4
6140 // CHECK0-NEXT:    [[TMP12:%.*]] = load i64, ptr [[GA_CASTED]], align 8
6141 // CHECK0-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6142 // CHECK0-NEXT:    store i64 [[TMP9]], ptr [[TMP13]], align 8
6143 // CHECK0-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6144 // CHECK0-NEXT:    store i64 [[TMP9]], ptr [[TMP14]], align 8
6145 // CHECK0-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
6146 // CHECK0-NEXT:    store ptr null, ptr [[TMP15]], align 8
6147 // CHECK0-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
6148 // CHECK0-NEXT:    store ptr [[TMP10]], ptr [[TMP16]], align 8
6149 // CHECK0-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
6150 // CHECK0-NEXT:    store ptr [[TMP10]], ptr [[TMP17]], align 8
6151 // CHECK0-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
6152 // CHECK0-NEXT:    store ptr null, ptr [[TMP18]], align 8
6153 // CHECK0-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
6154 // CHECK0-NEXT:    store i64 [[TMP12]], ptr [[TMP19]], align 8
6155 // CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
6156 // CHECK0-NEXT:    store i64 [[TMP12]], ptr [[TMP20]], align 8
6157 // CHECK0-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
6158 // CHECK0-NEXT:    store ptr null, ptr [[TMP21]], align 8
6159 // CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6160 // CHECK0-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6161 // CHECK0-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
6162 // CHECK0-NEXT:    store i32 3, ptr [[TMP24]], align 4
6163 // CHECK0-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
6164 // CHECK0-NEXT:    store i32 3, ptr [[TMP25]], align 4
6165 // CHECK0-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
6166 // CHECK0-NEXT:    store ptr [[TMP22]], ptr [[TMP26]], align 8
6167 // CHECK0-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
6168 // CHECK0-NEXT:    store ptr [[TMP23]], ptr [[TMP27]], align 8
6169 // CHECK0-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
6170 // CHECK0-NEXT:    store ptr @.offload_sizes, ptr [[TMP28]], align 8
6171 // CHECK0-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
6172 // CHECK0-NEXT:    store ptr @.offload_maptypes, ptr [[TMP29]], align 8
6173 // CHECK0-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
6174 // CHECK0-NEXT:    store ptr null, ptr [[TMP30]], align 8
6175 // CHECK0-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
6176 // CHECK0-NEXT:    store ptr null, ptr [[TMP31]], align 8
6177 // CHECK0-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
6178 // CHECK0-NEXT:    store i64 0, ptr [[TMP32]], align 8
6179 // CHECK0-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
6180 // CHECK0-NEXT:    store i64 0, ptr [[TMP33]], align 8
6181 // CHECK0-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
6182 // CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP34]], align 4
6183 // CHECK0-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
6184 // CHECK0-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4
6185 // CHECK0-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
6186 // CHECK0-NEXT:    store i32 0, ptr [[TMP36]], align 4
6187 // CHECK0-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
6188 // CHECK0-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
6189 // CHECK0-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
6190 // CHECK0:       omp_offload.failed:
6191 // CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], ptr [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
6192 // CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
6193 // CHECK0:       omp_offload.cont:
6194 // CHECK0-NEXT:    [[TMP39:%.*]] = load i16, ptr [[AA]], align 2
6195 // CHECK0-NEXT:    store i16 [[TMP39]], ptr [[AA_CASTED]], align 2
6196 // CHECK0-NEXT:    [[TMP40:%.*]] = load i64, ptr [[AA_CASTED]], align 8
6197 // CHECK0-NEXT:    [[TMP41:%.*]] = mul nuw i64 [[TMP1]], 4
6198 // CHECK0-NEXT:    [[TMP42:%.*]] = mul nuw i64 5, [[TMP4]]
6199 // CHECK0-NEXT:    [[TMP43:%.*]] = mul nuw i64 [[TMP42]], 8
6200 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 72, i1 false)
6201 // CHECK0-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
6202 // CHECK0-NEXT:    store i64 [[TMP40]], ptr [[TMP44]], align 8
6203 // CHECK0-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
6204 // CHECK0-NEXT:    store i64 [[TMP40]], ptr [[TMP45]], align 8
6205 // CHECK0-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0
6206 // CHECK0-NEXT:    store ptr null, ptr [[TMP46]], align 8
6207 // CHECK0-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
6208 // CHECK0-NEXT:    store ptr [[B]], ptr [[TMP47]], align 8
6209 // CHECK0-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
6210 // CHECK0-NEXT:    store ptr [[B]], ptr [[TMP48]], align 8
6211 // CHECK0-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1
6212 // CHECK0-NEXT:    store ptr null, ptr [[TMP49]], align 8
6213 // CHECK0-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
6214 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[TMP50]], align 8
6215 // CHECK0-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
6216 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[TMP51]], align 8
6217 // CHECK0-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 2
6218 // CHECK0-NEXT:    store ptr null, ptr [[TMP52]], align 8
6219 // CHECK0-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
6220 // CHECK0-NEXT:    store ptr [[VLA]], ptr [[TMP53]], align 8
6221 // CHECK0-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
6222 // CHECK0-NEXT:    store ptr [[VLA]], ptr [[TMP54]], align 8
6223 // CHECK0-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
6224 // CHECK0-NEXT:    store i64 [[TMP41]], ptr [[TMP55]], align 8
6225 // CHECK0-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 3
6226 // CHECK0-NEXT:    store ptr null, ptr [[TMP56]], align 8
6227 // CHECK0-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
6228 // CHECK0-NEXT:    store ptr [[C]], ptr [[TMP57]], align 8
6229 // CHECK0-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
6230 // CHECK0-NEXT:    store ptr [[C]], ptr [[TMP58]], align 8
6231 // CHECK0-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 4
6232 // CHECK0-NEXT:    store ptr null, ptr [[TMP59]], align 8
6233 // CHECK0-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
6234 // CHECK0-NEXT:    store i64 5, ptr [[TMP60]], align 8
6235 // CHECK0-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
6236 // CHECK0-NEXT:    store i64 5, ptr [[TMP61]], align 8
6237 // CHECK0-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 5
6238 // CHECK0-NEXT:    store ptr null, ptr [[TMP62]], align 8
6239 // CHECK0-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
6240 // CHECK0-NEXT:    store i64 [[TMP4]], ptr [[TMP63]], align 8
6241 // CHECK0-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
6242 // CHECK0-NEXT:    store i64 [[TMP4]], ptr [[TMP64]], align 8
6243 // CHECK0-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 6
6244 // CHECK0-NEXT:    store ptr null, ptr [[TMP65]], align 8
6245 // CHECK0-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
6246 // CHECK0-NEXT:    store ptr [[VLA1]], ptr [[TMP66]], align 8
6247 // CHECK0-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
6248 // CHECK0-NEXT:    store ptr [[VLA1]], ptr [[TMP67]], align 8
6249 // CHECK0-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
6250 // CHECK0-NEXT:    store i64 [[TMP43]], ptr [[TMP68]], align 8
6251 // CHECK0-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 7
6252 // CHECK0-NEXT:    store ptr null, ptr [[TMP69]], align 8
6253 // CHECK0-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
6254 // CHECK0-NEXT:    store ptr [[D]], ptr [[TMP70]], align 8
6255 // CHECK0-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
6256 // CHECK0-NEXT:    store ptr [[D]], ptr [[TMP71]], align 8
6257 // CHECK0-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 8
6258 // CHECK0-NEXT:    store ptr null, ptr [[TMP72]], align 8
6259 // CHECK0-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
6260 // CHECK0-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
6261 // CHECK0-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
6262 // CHECK0-NEXT:    [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
6263 // CHECK0-NEXT:    store i32 3, ptr [[TMP76]], align 4
6264 // CHECK0-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
6265 // CHECK0-NEXT:    store i32 9, ptr [[TMP77]], align 4
6266 // CHECK0-NEXT:    [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
6267 // CHECK0-NEXT:    store ptr [[TMP73]], ptr [[TMP78]], align 8
6268 // CHECK0-NEXT:    [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
6269 // CHECK0-NEXT:    store ptr [[TMP74]], ptr [[TMP79]], align 8
6270 // CHECK0-NEXT:    [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
6271 // CHECK0-NEXT:    store ptr [[TMP75]], ptr [[TMP80]], align 8
6272 // CHECK0-NEXT:    [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
6273 // CHECK0-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP81]], align 8
6274 // CHECK0-NEXT:    [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
6275 // CHECK0-NEXT:    store ptr null, ptr [[TMP82]], align 8
6276 // CHECK0-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
6277 // CHECK0-NEXT:    store ptr null, ptr [[TMP83]], align 8
6278 // CHECK0-NEXT:    [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
6279 // CHECK0-NEXT:    store i64 0, ptr [[TMP84]], align 8
6280 // CHECK0-NEXT:    [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
6281 // CHECK0-NEXT:    store i64 0, ptr [[TMP85]], align 8
6282 // CHECK0-NEXT:    [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
6283 // CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
6284 // CHECK0-NEXT:    [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
6285 // CHECK0-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
6286 // CHECK0-NEXT:    [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
6287 // CHECK0-NEXT:    store i32 0, ptr [[TMP88]], align 4
6288 // CHECK0-NEXT:    [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
6289 // CHECK0-NEXT:    [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
6290 // CHECK0-NEXT:    br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
6291 // CHECK0:       omp_offload.failed6:
6292 // CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP40]], ptr [[B]], i64 [[TMP1]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP4]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
6293 // CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
6294 // CHECK0:       omp_offload.cont7:
6295 // CHECK0-NEXT:    [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
6296 // CHECK0-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
6297 // CHECK0-NEXT:    store ptr [[TMP91]], ptr [[TMP92]], align 8
6298 // CHECK0-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
6299 // CHECK0-NEXT:    store ptr [[TMP91]], ptr [[TMP93]], align 8
6300 // CHECK0-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 0
6301 // CHECK0-NEXT:    store ptr null, ptr [[TMP94]], align 8
6302 // CHECK0-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
6303 // CHECK0-NEXT:    store ptr [[E]], ptr [[TMP95]], align 8
6304 // CHECK0-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
6305 // CHECK0-NEXT:    store ptr [[E]], ptr [[TMP96]], align 8
6306 // CHECK0-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 1
6307 // CHECK0-NEXT:    store ptr null, ptr [[TMP97]], align 8
6308 // CHECK0-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
6309 // CHECK0-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
6310 // CHECK0-NEXT:    [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
6311 // CHECK0-NEXT:    store i32 3, ptr [[TMP100]], align 4
6312 // CHECK0-NEXT:    [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
6313 // CHECK0-NEXT:    store i32 2, ptr [[TMP101]], align 4
6314 // CHECK0-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
6315 // CHECK0-NEXT:    store ptr [[TMP98]], ptr [[TMP102]], align 8
6316 // CHECK0-NEXT:    [[TMP103:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
6317 // CHECK0-NEXT:    store ptr [[TMP99]], ptr [[TMP103]], align 8
6318 // CHECK0-NEXT:    [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
6319 // CHECK0-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP104]], align 8
6320 // CHECK0-NEXT:    [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
6321 // CHECK0-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP105]], align 8
6322 // CHECK0-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
6323 // CHECK0-NEXT:    store ptr null, ptr [[TMP106]], align 8
6324 // CHECK0-NEXT:    [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
6325 // CHECK0-NEXT:    store ptr null, ptr [[TMP107]], align 8
6326 // CHECK0-NEXT:    [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
6327 // CHECK0-NEXT:    store i64 0, ptr [[TMP108]], align 8
6328 // CHECK0-NEXT:    [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
6329 // CHECK0-NEXT:    store i64 0, ptr [[TMP109]], align 8
6330 // CHECK0-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
6331 // CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
6332 // CHECK0-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
6333 // CHECK0-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
6334 // CHECK0-NEXT:    [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
6335 // CHECK0-NEXT:    store i32 0, ptr [[TMP112]], align 4
6336 // CHECK0-NEXT:    [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
6337 // CHECK0-NEXT:    [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
6338 // CHECK0-NEXT:    br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
6339 // CHECK0:       omp_offload.failed12:
6340 // CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
6341 // CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
6342 // CHECK0:       omp_offload.cont13:
6343 // CHECK0-NEXT:    [[TMP115:%.*]] = load i32, ptr [[A]], align 4
6344 // CHECK0-NEXT:    [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
6345 // CHECK0-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP116]])
6346 // CHECK0-NEXT:    ret i32 [[TMP115]]
6347 //
6348 //
6349 // CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
6350 // CHECK0-SAME: (i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
6351 // CHECK0-NEXT:  entry:
6352 // CHECK0-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
6353 // CHECK0-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
6354 // CHECK0-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
6355 // CHECK0-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
6356 // CHECK0-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
6357 // CHECK0-NEXT:    store i64 [[GA]], ptr [[GA_ADDR]], align 8
6358 // CHECK0-NEXT:    ret void
6359 //
6360 //
6361 // CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
6362 // CHECK0-SAME: (i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
6363 // CHECK0-NEXT:  entry:
6364 // CHECK0-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
6365 // CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
6366 // CHECK0-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
6367 // CHECK0-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 8
6368 // CHECK0-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
6369 // CHECK0-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
6370 // CHECK0-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
6371 // CHECK0-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 8
6372 // CHECK0-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 8
6373 // CHECK0-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
6374 // CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
6375 // CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
6376 // CHECK0-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
6377 // CHECK0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
6378 // CHECK0-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
6379 // CHECK0-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
6380 // CHECK0-NEXT:    store i64 [[AA]], ptr [[AA_ADDR]], align 8
6381 // CHECK0-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
6382 // CHECK0-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
6383 // CHECK0-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 8
6384 // CHECK0-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
6385 // CHECK0-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
6386 // CHECK0-NEXT:    store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
6387 // CHECK0-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 8
6388 // CHECK0-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
6389 // CHECK0-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
6390 // CHECK0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
6391 // CHECK0-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
6392 // CHECK0-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
6393 // CHECK0-NEXT:    [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
6394 // CHECK0-NEXT:    [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
6395 // CHECK0-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
6396 // CHECK0-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
6397 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
6398 // CHECK0-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
6399 // CHECK0-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
6400 // CHECK0-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
6401 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
6402 // CHECK0-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
6403 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
6404 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
6405 // CHECK0-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
6406 // CHECK0-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
6407 // CHECK0-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
6408 // CHECK0-NEXT:    store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
6409 // CHECK0-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
6410 // CHECK0-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
6411 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
6412 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
6413 // CHECK0-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
6414 // CHECK0-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
6415 // CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
6416 // CHECK0-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
6417 // CHECK0-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
6418 // CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
6419 // CHECK0-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
6420 // CHECK0-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
6421 // CHECK0-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
6422 // CHECK0-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
6423 // CHECK0-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
6424 // CHECK0-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
6425 // CHECK0-NEXT:    [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
6426 // CHECK0-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
6427 // CHECK0-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
6428 // CHECK0-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
6429 // CHECK0-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
6430 // CHECK0-NEXT:    store i64 1, ptr [[X]], align 8
6431 // CHECK0-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
6432 // CHECK0-NEXT:    store i8 1, ptr [[Y]], align 8
6433 // CHECK0-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
6434 // CHECK0-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
6435 // CHECK0-NEXT:    ret void
6436 //
6437 //
6438 // CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
6439 // CHECK0-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
6440 // CHECK0-NEXT:  entry:
6441 // CHECK0-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
6442 // CHECK0-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
6443 // CHECK0-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
6444 // CHECK0-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
6445 // CHECK0-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
6446 // CHECK0-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
6447 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i64 8, i1 false)
6448 // CHECK0-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
6449 // CHECK0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
6450 // CHECK0-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
6451 // CHECK0-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
6452 // CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
6453 // CHECK0-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 8
6454 // CHECK0-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
6455 // CHECK0-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
6456 // CHECK0-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
6457 // CHECK0-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
6458 // CHECK0-NEXT:    store double [[INC]], ptr [[ARRAYIDX2]], align 8
6459 // CHECK0-NEXT:    ret void
6460 //
6461 //
6462 // CHECK0-LABEL: define {{[^@]+}}@_Z3bariPd
6463 // CHECK0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
6464 // CHECK0-NEXT:  entry:
6465 // CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
6466 // CHECK0-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
6467 // CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
6468 // CHECK0-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
6469 // CHECK0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
6470 // CHECK0-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
6471 // CHECK0-NEXT:    store i32 0, ptr [[A]], align 4
6472 // CHECK0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
6473 // CHECK0-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
6474 // CHECK0-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
6475 // CHECK0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
6476 // CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
6477 // CHECK0-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
6478 // CHECK0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
6479 // CHECK0-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
6480 // CHECK0-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
6481 // CHECK0-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
6482 // CHECK0-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
6483 // CHECK0-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
6484 // CHECK0-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
6485 // CHECK0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
6486 // CHECK0-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
6487 // CHECK0-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
6488 // CHECK0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
6489 // CHECK0-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
6490 // CHECK0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
6491 // CHECK0-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
6492 // CHECK0-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
6493 // CHECK0-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
6494 // CHECK0-NEXT:    ret i32 [[TMP9]]
6495 //
6496 //
6497 // CHECK0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
6498 // CHECK0-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
6499 // CHECK0-NEXT:  entry:
6500 // CHECK0-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
6501 // CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
6502 // CHECK0-NEXT:    [[B:%.*]] = alloca i32, align 4
6503 // CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
6504 // CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
6505 // CHECK0-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
6506 // CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 8
6507 // CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
6508 // CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
6509 // CHECK0-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
6510 // CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
6511 // CHECK0-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
6512 // CHECK0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
6513 // CHECK0-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
6514 // CHECK0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
6515 // CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
6516 // CHECK0-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
6517 // CHECK0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
6518 // CHECK0-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
6519 // CHECK0-NEXT:    [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
6520 // CHECK0-NEXT:    store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
6521 // CHECK0-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
6522 // CHECK0-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
6523 // CHECK0-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
6524 // CHECK0-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B]], align 4
6525 // CHECK0-NEXT:    store i32 [[TMP5]], ptr [[B_CASTED]], align 4
6526 // CHECK0-NEXT:    [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
6527 // CHECK0-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
6528 // CHECK0-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
6529 // CHECK0-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
6530 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.5, i64 40, i1 false)
6531 // CHECK0-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6532 // CHECK0-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 8
6533 // CHECK0-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6534 // CHECK0-NEXT:    store ptr [[A]], ptr [[TMP10]], align 8
6535 // CHECK0-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
6536 // CHECK0-NEXT:    store ptr null, ptr [[TMP11]], align 8
6537 // CHECK0-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
6538 // CHECK0-NEXT:    store i64 [[TMP6]], ptr [[TMP12]], align 8
6539 // CHECK0-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
6540 // CHECK0-NEXT:    store i64 [[TMP6]], ptr [[TMP13]], align 8
6541 // CHECK0-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
6542 // CHECK0-NEXT:    store ptr null, ptr [[TMP14]], align 8
6543 // CHECK0-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
6544 // CHECK0-NEXT:    store i64 2, ptr [[TMP15]], align 8
6545 // CHECK0-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
6546 // CHECK0-NEXT:    store i64 2, ptr [[TMP16]], align 8
6547 // CHECK0-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
6548 // CHECK0-NEXT:    store ptr null, ptr [[TMP17]], align 8
6549 // CHECK0-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
6550 // CHECK0-NEXT:    store i64 [[TMP2]], ptr [[TMP18]], align 8
6551 // CHECK0-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
6552 // CHECK0-NEXT:    store i64 [[TMP2]], ptr [[TMP19]], align 8
6553 // CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
6554 // CHECK0-NEXT:    store ptr null, ptr [[TMP20]], align 8
6555 // CHECK0-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
6556 // CHECK0-NEXT:    store ptr [[VLA]], ptr [[TMP21]], align 8
6557 // CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
6558 // CHECK0-NEXT:    store ptr [[VLA]], ptr [[TMP22]], align 8
6559 // CHECK0-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
6560 // CHECK0-NEXT:    store i64 [[TMP8]], ptr [[TMP23]], align 8
6561 // CHECK0-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
6562 // CHECK0-NEXT:    store ptr null, ptr [[TMP24]], align 8
6563 // CHECK0-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6564 // CHECK0-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6565 // CHECK0-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
6566 // CHECK0-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
6567 // CHECK0-NEXT:    store i32 3, ptr [[TMP28]], align 4
6568 // CHECK0-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
6569 // CHECK0-NEXT:    store i32 5, ptr [[TMP29]], align 4
6570 // CHECK0-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
6571 // CHECK0-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 8
6572 // CHECK0-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
6573 // CHECK0-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 8
6574 // CHECK0-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
6575 // CHECK0-NEXT:    store ptr [[TMP27]], ptr [[TMP32]], align 8
6576 // CHECK0-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
6577 // CHECK0-NEXT:    store ptr @.offload_maptypes.6, ptr [[TMP33]], align 8
6578 // CHECK0-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
6579 // CHECK0-NEXT:    store ptr null, ptr [[TMP34]], align 8
6580 // CHECK0-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
6581 // CHECK0-NEXT:    store ptr null, ptr [[TMP35]], align 8
6582 // CHECK0-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
6583 // CHECK0-NEXT:    store i64 0, ptr [[TMP36]], align 8
6584 // CHECK0-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
6585 // CHECK0-NEXT:    store i64 0, ptr [[TMP37]], align 8
6586 // CHECK0-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
6587 // CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
6588 // CHECK0-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
6589 // CHECK0-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
6590 // CHECK0-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
6591 // CHECK0-NEXT:    store i32 0, ptr [[TMP40]], align 4
6592 // CHECK0-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
6593 // CHECK0-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
6594 // CHECK0-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
6595 // CHECK0:       omp_offload.failed:
6596 // CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]]
6597 // CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
6598 // CHECK0:       omp_offload.cont:
6599 // CHECK0-NEXT:    [[TMP43:%.*]] = mul nsw i64 1, [[TMP2]]
6600 // CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP43]]
6601 // CHECK0-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
6602 // CHECK0-NEXT:    [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
6603 // CHECK0-NEXT:    [[CONV:%.*]] = sext i16 [[TMP44]] to i32
6604 // CHECK0-NEXT:    [[TMP45:%.*]] = load i32, ptr [[B]], align 4
6605 // CHECK0-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
6606 // CHECK0-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
6607 // CHECK0-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP46]])
6608 // CHECK0-NEXT:    ret i32 [[ADD3]]
6609 //
6610 //
6611 // CHECK0-LABEL: define {{[^@]+}}@_ZL7fstatici
6612 // CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
6613 // CHECK0-NEXT:  entry:
6614 // CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
6615 // CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
6616 // CHECK0-NEXT:    [[AAA:%.*]] = alloca i8, align 1
6617 // CHECK0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
6618 // CHECK0-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
6619 // CHECK0-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
6620 // CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
6621 // CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
6622 // CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
6623 // CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
6624 // CHECK0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
6625 // CHECK0-NEXT:    store i32 0, ptr [[A]], align 4
6626 // CHECK0-NEXT:    store i8 0, ptr [[AAA]], align 1
6627 // CHECK0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
6628 // CHECK0-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
6629 // CHECK0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
6630 // CHECK0-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
6631 // CHECK0-NEXT:    store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
6632 // CHECK0-NEXT:    [[TMP3:%.*]] = load i64, ptr [[AAA_CASTED]], align 8
6633 // CHECK0-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6634 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[TMP4]], align 8
6635 // CHECK0-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6636 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[TMP5]], align 8
6637 // CHECK0-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
6638 // CHECK0-NEXT:    store ptr null, ptr [[TMP6]], align 8
6639 // CHECK0-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
6640 // CHECK0-NEXT:    store i64 [[TMP3]], ptr [[TMP7]], align 8
6641 // CHECK0-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
6642 // CHECK0-NEXT:    store i64 [[TMP3]], ptr [[TMP8]], align 8
6643 // CHECK0-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
6644 // CHECK0-NEXT:    store ptr null, ptr [[TMP9]], align 8
6645 // CHECK0-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
6646 // CHECK0-NEXT:    store ptr [[B]], ptr [[TMP10]], align 8
6647 // CHECK0-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
6648 // CHECK0-NEXT:    store ptr [[B]], ptr [[TMP11]], align 8
6649 // CHECK0-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
6650 // CHECK0-NEXT:    store ptr null, ptr [[TMP12]], align 8
6651 // CHECK0-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6652 // CHECK0-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6653 // CHECK0-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
6654 // CHECK0-NEXT:    store i32 3, ptr [[TMP15]], align 4
6655 // CHECK0-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
6656 // CHECK0-NEXT:    store i32 3, ptr [[TMP16]], align 4
6657 // CHECK0-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
6658 // CHECK0-NEXT:    store ptr [[TMP13]], ptr [[TMP17]], align 8
6659 // CHECK0-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
6660 // CHECK0-NEXT:    store ptr [[TMP14]], ptr [[TMP18]], align 8
6661 // CHECK0-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
6662 // CHECK0-NEXT:    store ptr @.offload_sizes.7, ptr [[TMP19]], align 8
6663 // CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
6664 // CHECK0-NEXT:    store ptr @.offload_maptypes.8, ptr [[TMP20]], align 8
6665 // CHECK0-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
6666 // CHECK0-NEXT:    store ptr null, ptr [[TMP21]], align 8
6667 // CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
6668 // CHECK0-NEXT:    store ptr null, ptr [[TMP22]], align 8
6669 // CHECK0-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
6670 // CHECK0-NEXT:    store i64 0, ptr [[TMP23]], align 8
6671 // CHECK0-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
6672 // CHECK0-NEXT:    store i64 0, ptr [[TMP24]], align 8
6673 // CHECK0-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
6674 // CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
6675 // CHECK0-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
6676 // CHECK0-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
6677 // CHECK0-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
6678 // CHECK0-NEXT:    store i32 0, ptr [[TMP27]], align 4
6679 // CHECK0-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
6680 // CHECK0-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
6681 // CHECK0-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
6682 // CHECK0:       omp_offload.failed:
6683 // CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]]
6684 // CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
6685 // CHECK0:       omp_offload.cont:
6686 // CHECK0-NEXT:    [[TMP30:%.*]] = load i32, ptr [[A]], align 4
6687 // CHECK0-NEXT:    ret i32 [[TMP30]]
6688 //
6689 //
6690 // CHECK0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
6691 // CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
6692 // CHECK0-NEXT:  entry:
6693 // CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
6694 // CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
6695 // CHECK0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
6696 // CHECK0-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
6697 // CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
6698 // CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
6699 // CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
6700 // CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
6701 // CHECK0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
6702 // CHECK0-NEXT:    store i32 0, ptr [[A]], align 4
6703 // CHECK0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
6704 // CHECK0-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
6705 // CHECK0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
6706 // CHECK0-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6707 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[TMP2]], align 8
6708 // CHECK0-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6709 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[TMP3]], align 8
6710 // CHECK0-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
6711 // CHECK0-NEXT:    store ptr null, ptr [[TMP4]], align 8
6712 // CHECK0-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
6713 // CHECK0-NEXT:    store ptr [[B]], ptr [[TMP5]], align 8
6714 // CHECK0-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
6715 // CHECK0-NEXT:    store ptr [[B]], ptr [[TMP6]], align 8
6716 // CHECK0-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
6717 // CHECK0-NEXT:    store ptr null, ptr [[TMP7]], align 8
6718 // CHECK0-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6719 // CHECK0-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6720 // CHECK0-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
6721 // CHECK0-NEXT:    store i32 3, ptr [[TMP10]], align 4
6722 // CHECK0-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
6723 // CHECK0-NEXT:    store i32 2, ptr [[TMP11]], align 4
6724 // CHECK0-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
6725 // CHECK0-NEXT:    store ptr [[TMP8]], ptr [[TMP12]], align 8
6726 // CHECK0-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
6727 // CHECK0-NEXT:    store ptr [[TMP9]], ptr [[TMP13]], align 8
6728 // CHECK0-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
6729 // CHECK0-NEXT:    store ptr @.offload_sizes.9, ptr [[TMP14]], align 8
6730 // CHECK0-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
6731 // CHECK0-NEXT:    store ptr @.offload_maptypes.10, ptr [[TMP15]], align 8
6732 // CHECK0-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
6733 // CHECK0-NEXT:    store ptr null, ptr [[TMP16]], align 8
6734 // CHECK0-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
6735 // CHECK0-NEXT:    store ptr null, ptr [[TMP17]], align 8
6736 // CHECK0-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
6737 // CHECK0-NEXT:    store i64 0, ptr [[TMP18]], align 8
6738 // CHECK0-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
6739 // CHECK0-NEXT:    store i64 0, ptr [[TMP19]], align 8
6740 // CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
6741 // CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
6742 // CHECK0-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
6743 // CHECK0-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
6744 // CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
6745 // CHECK0-NEXT:    store i32 0, ptr [[TMP22]], align 4
6746 // CHECK0-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
6747 // CHECK0-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
6748 // CHECK0-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
6749 // CHECK0:       omp_offload.failed:
6750 // CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], ptr [[B]]) #[[ATTR3]]
6751 // CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
6752 // CHECK0:       omp_offload.cont:
6753 // CHECK0-NEXT:    [[TMP25:%.*]] = load i32, ptr [[A]], align 4
6754 // CHECK0-NEXT:    ret i32 [[TMP25]]
6755 //
6756 //
6757 // CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
6758 // CHECK0-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
6759 // CHECK0-NEXT:  entry:
6760 // CHECK0-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
6761 // CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
6762 // CHECK0-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
6763 // CHECK0-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
6764 // CHECK0-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
6765 // CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
6766 // CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
6767 // CHECK0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
6768 // CHECK0-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
6769 // CHECK0-NEXT:    store i64 [[B]], ptr [[B_ADDR]], align 8
6770 // CHECK0-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
6771 // CHECK0-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
6772 // CHECK0-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
6773 // CHECK0-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
6774 // CHECK0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
6775 // CHECK0-NEXT:    [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
6776 // CHECK0-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
6777 // CHECK0-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
6778 // CHECK0-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
6779 // CHECK0-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
6780 // CHECK0-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
6781 // CHECK0-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
6782 // CHECK0-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
6783 // CHECK0-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
6784 // CHECK0-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
6785 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
6786 // CHECK0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
6787 // CHECK0-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
6788 // CHECK0-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
6789 // CHECK0-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
6790 // CHECK0-NEXT:    store double [[ADD]], ptr [[A]], align 8
6791 // CHECK0-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
6792 // CHECK0-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 8
6793 // CHECK0-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
6794 // CHECK0-NEXT:    store double [[INC]], ptr [[A4]], align 8
6795 // CHECK0-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
6796 // CHECK0-NEXT:    [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
6797 // CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
6798 // CHECK0-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
6799 // CHECK0-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
6800 // CHECK0-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
6801 // CHECK0-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
6802 // CHECK0-NEXT:    ret void
6803 //
6804 //
6805 // CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
6806 // CHECK0-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
6807 // CHECK0-NEXT:  entry:
6808 // CHECK0-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
6809 // CHECK0-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
6810 // CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
6811 // CHECK0-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
6812 // CHECK0-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
6813 // CHECK0-NEXT:    store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
6814 // CHECK0-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
6815 // CHECK0-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
6816 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
6817 // CHECK0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
6818 // CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
6819 // CHECK0-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
6820 // CHECK0-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
6821 // CHECK0-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
6822 // CHECK0-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
6823 // CHECK0-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
6824 // CHECK0-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
6825 // CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
6826 // CHECK0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
6827 // CHECK0-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
6828 // CHECK0-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
6829 // CHECK0-NEXT:    ret void
6830 //
6831 //
6832 // CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
6833 // CHECK0-SAME: (i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
6834 // CHECK0-NEXT:  entry:
6835 // CHECK0-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
6836 // CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
6837 // CHECK0-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
6838 // CHECK0-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
6839 // CHECK0-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
6840 // CHECK0-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
6841 // CHECK0-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
6842 // CHECK0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
6843 // CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
6844 // CHECK0-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
6845 // CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
6846 // CHECK0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
6847 // CHECK0-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
6848 // CHECK0-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
6849 // CHECK0-NEXT:    ret void
6850 //
6851 //
6852 // CHECK1-LABEL: define {{[^@]+}}@_Z3fooiPd
6853 // CHECK1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
6854 // CHECK1-NEXT:  entry:
6855 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
6856 // CHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
6857 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
6858 // CHECK1-NEXT:    [[AA:%.*]] = alloca i16, align 2
6859 // CHECK1-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
6860 // CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
6861 // CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
6862 // CHECK1-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
6863 // CHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
6864 // CHECK1-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
6865 // CHECK1-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
6866 // CHECK1-NEXT:    [[P:%.*]] = alloca ptr, align 64
6867 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
6868 // CHECK1-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
6869 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
6870 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
6871 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
6872 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
6873 // CHECK1-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
6874 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 8
6875 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 8
6876 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 8
6877 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
6878 // CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
6879 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
6880 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
6881 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
6882 // CHECK1-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
6883 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
6884 // CHECK1-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
6885 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
6886 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
6887 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
6888 // CHECK1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
6889 // CHECK1-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
6890 // CHECK1-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
6891 // CHECK1-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
6892 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
6893 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
6894 // CHECK1-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
6895 // CHECK1-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
6896 // CHECK1-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
6897 // CHECK1-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
6898 // CHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
6899 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
6900 // CHECK1-NEXT:    store i32 [[TMP6]], ptr [[X]], align 4
6901 // CHECK1-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
6902 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
6903 // CHECK1-NEXT:    store i32 [[TMP7]], ptr [[Y]], align 4
6904 // CHECK1-NEXT:    store ptr [[A]], ptr [[P]], align 64
6905 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
6906 // CHECK1-NEXT:    store i32 [[TMP8]], ptr [[A_CASTED]], align 4
6907 // CHECK1-NEXT:    [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8
6908 // CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[P]], align 64
6909 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr @ga, align 4
6910 // CHECK1-NEXT:    store i32 [[TMP11]], ptr [[GA_CASTED]], align 4
6911 // CHECK1-NEXT:    [[TMP12:%.*]] = load i64, ptr [[GA_CASTED]], align 8
6912 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6913 // CHECK1-NEXT:    store i64 [[TMP9]], ptr [[TMP13]], align 8
6914 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6915 // CHECK1-NEXT:    store i64 [[TMP9]], ptr [[TMP14]], align 8
6916 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
6917 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
6918 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
6919 // CHECK1-NEXT:    store ptr [[TMP10]], ptr [[TMP16]], align 8
6920 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
6921 // CHECK1-NEXT:    store ptr [[TMP10]], ptr [[TMP17]], align 8
6922 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
6923 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
6924 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
6925 // CHECK1-NEXT:    store i64 [[TMP12]], ptr [[TMP19]], align 8
6926 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
6927 // CHECK1-NEXT:    store i64 [[TMP12]], ptr [[TMP20]], align 8
6928 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
6929 // CHECK1-NEXT:    store ptr null, ptr [[TMP21]], align 8
6930 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
6931 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
6932 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
6933 // CHECK1-NEXT:    store i32 3, ptr [[TMP24]], align 4
6934 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
6935 // CHECK1-NEXT:    store i32 3, ptr [[TMP25]], align 4
6936 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
6937 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[TMP26]], align 8
6938 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
6939 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[TMP27]], align 8
6940 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
6941 // CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP28]], align 8
6942 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
6943 // CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP29]], align 8
6944 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
6945 // CHECK1-NEXT:    store ptr null, ptr [[TMP30]], align 8
6946 // CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
6947 // CHECK1-NEXT:    store ptr null, ptr [[TMP31]], align 8
6948 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
6949 // CHECK1-NEXT:    store i64 0, ptr [[TMP32]], align 8
6950 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
6951 // CHECK1-NEXT:    store i64 0, ptr [[TMP33]], align 8
6952 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
6953 // CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP34]], align 4
6954 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
6955 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4
6956 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
6957 // CHECK1-NEXT:    store i32 0, ptr [[TMP36]], align 4
6958 // CHECK1-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
6959 // CHECK1-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
6960 // CHECK1-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
6961 // CHECK1:       omp_offload.failed:
6962 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], ptr [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
6963 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
6964 // CHECK1:       omp_offload.cont:
6965 // CHECK1-NEXT:    [[TMP39:%.*]] = load i16, ptr [[AA]], align 2
6966 // CHECK1-NEXT:    store i16 [[TMP39]], ptr [[AA_CASTED]], align 2
6967 // CHECK1-NEXT:    [[TMP40:%.*]] = load i64, ptr [[AA_CASTED]], align 8
6968 // CHECK1-NEXT:    [[TMP41:%.*]] = mul nuw i64 [[TMP1]], 4
6969 // CHECK1-NEXT:    [[TMP42:%.*]] = mul nuw i64 5, [[TMP4]]
6970 // CHECK1-NEXT:    [[TMP43:%.*]] = mul nuw i64 [[TMP42]], 8
6971 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 72, i1 false)
6972 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
6973 // CHECK1-NEXT:    store i64 [[TMP40]], ptr [[TMP44]], align 8
6974 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
6975 // CHECK1-NEXT:    store i64 [[TMP40]], ptr [[TMP45]], align 8
6976 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0
6977 // CHECK1-NEXT:    store ptr null, ptr [[TMP46]], align 8
6978 // CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
6979 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP47]], align 8
6980 // CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
6981 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP48]], align 8
6982 // CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1
6983 // CHECK1-NEXT:    store ptr null, ptr [[TMP49]], align 8
6984 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
6985 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP50]], align 8
6986 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
6987 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP51]], align 8
6988 // CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 2
6989 // CHECK1-NEXT:    store ptr null, ptr [[TMP52]], align 8
6990 // CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
6991 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP53]], align 8
6992 // CHECK1-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
6993 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP54]], align 8
6994 // CHECK1-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
6995 // CHECK1-NEXT:    store i64 [[TMP41]], ptr [[TMP55]], align 8
6996 // CHECK1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 3
6997 // CHECK1-NEXT:    store ptr null, ptr [[TMP56]], align 8
6998 // CHECK1-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
6999 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP57]], align 8
7000 // CHECK1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
7001 // CHECK1-NEXT:    store ptr [[C]], ptr [[TMP58]], align 8
7002 // CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 4
7003 // CHECK1-NEXT:    store ptr null, ptr [[TMP59]], align 8
7004 // CHECK1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
7005 // CHECK1-NEXT:    store i64 5, ptr [[TMP60]], align 8
7006 // CHECK1-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
7007 // CHECK1-NEXT:    store i64 5, ptr [[TMP61]], align 8
7008 // CHECK1-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 5
7009 // CHECK1-NEXT:    store ptr null, ptr [[TMP62]], align 8
7010 // CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
7011 // CHECK1-NEXT:    store i64 [[TMP4]], ptr [[TMP63]], align 8
7012 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
7013 // CHECK1-NEXT:    store i64 [[TMP4]], ptr [[TMP64]], align 8
7014 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 6
7015 // CHECK1-NEXT:    store ptr null, ptr [[TMP65]], align 8
7016 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
7017 // CHECK1-NEXT:    store ptr [[VLA1]], ptr [[TMP66]], align 8
7018 // CHECK1-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
7019 // CHECK1-NEXT:    store ptr [[VLA1]], ptr [[TMP67]], align 8
7020 // CHECK1-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
7021 // CHECK1-NEXT:    store i64 [[TMP43]], ptr [[TMP68]], align 8
7022 // CHECK1-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 7
7023 // CHECK1-NEXT:    store ptr null, ptr [[TMP69]], align 8
7024 // CHECK1-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
7025 // CHECK1-NEXT:    store ptr [[D]], ptr [[TMP70]], align 8
7026 // CHECK1-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
7027 // CHECK1-NEXT:    store ptr [[D]], ptr [[TMP71]], align 8
7028 // CHECK1-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 8
7029 // CHECK1-NEXT:    store ptr null, ptr [[TMP72]], align 8
7030 // CHECK1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
7031 // CHECK1-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
7032 // CHECK1-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
7033 // CHECK1-NEXT:    [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
7034 // CHECK1-NEXT:    store i32 3, ptr [[TMP76]], align 4
7035 // CHECK1-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
7036 // CHECK1-NEXT:    store i32 9, ptr [[TMP77]], align 4
7037 // CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
7038 // CHECK1-NEXT:    store ptr [[TMP73]], ptr [[TMP78]], align 8
7039 // CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
7040 // CHECK1-NEXT:    store ptr [[TMP74]], ptr [[TMP79]], align 8
7041 // CHECK1-NEXT:    [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
7042 // CHECK1-NEXT:    store ptr [[TMP75]], ptr [[TMP80]], align 8
7043 // CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
7044 // CHECK1-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP81]], align 8
7045 // CHECK1-NEXT:    [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
7046 // CHECK1-NEXT:    store ptr null, ptr [[TMP82]], align 8
7047 // CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
7048 // CHECK1-NEXT:    store ptr null, ptr [[TMP83]], align 8
7049 // CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
7050 // CHECK1-NEXT:    store i64 0, ptr [[TMP84]], align 8
7051 // CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
7052 // CHECK1-NEXT:    store i64 0, ptr [[TMP85]], align 8
7053 // CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
7054 // CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
7055 // CHECK1-NEXT:    [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
7056 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
7057 // CHECK1-NEXT:    [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
7058 // CHECK1-NEXT:    store i32 0, ptr [[TMP88]], align 4
7059 // CHECK1-NEXT:    [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
7060 // CHECK1-NEXT:    [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
7061 // CHECK1-NEXT:    br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
7062 // CHECK1:       omp_offload.failed6:
7063 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP40]], ptr [[B]], i64 [[TMP1]], ptr [[VLA]], ptr [[C]], i64 5, i64 [[TMP4]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
7064 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
7065 // CHECK1:       omp_offload.cont7:
7066 // CHECK1-NEXT:    [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
7067 // CHECK1-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
7068 // CHECK1-NEXT:    store ptr [[TMP91]], ptr [[TMP92]], align 8
7069 // CHECK1-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
7070 // CHECK1-NEXT:    store ptr [[TMP91]], ptr [[TMP93]], align 8
7071 // CHECK1-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 0
7072 // CHECK1-NEXT:    store ptr null, ptr [[TMP94]], align 8
7073 // CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
7074 // CHECK1-NEXT:    store ptr [[E]], ptr [[TMP95]], align 8
7075 // CHECK1-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
7076 // CHECK1-NEXT:    store ptr [[E]], ptr [[TMP96]], align 8
7077 // CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 1
7078 // CHECK1-NEXT:    store ptr null, ptr [[TMP97]], align 8
7079 // CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
7080 // CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
7081 // CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
7082 // CHECK1-NEXT:    store i32 3, ptr [[TMP100]], align 4
7083 // CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
7084 // CHECK1-NEXT:    store i32 2, ptr [[TMP101]], align 4
7085 // CHECK1-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
7086 // CHECK1-NEXT:    store ptr [[TMP98]], ptr [[TMP102]], align 8
7087 // CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
7088 // CHECK1-NEXT:    store ptr [[TMP99]], ptr [[TMP103]], align 8
7089 // CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
7090 // CHECK1-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP104]], align 8
7091 // CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
7092 // CHECK1-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP105]], align 8
7093 // CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
7094 // CHECK1-NEXT:    store ptr null, ptr [[TMP106]], align 8
7095 // CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
7096 // CHECK1-NEXT:    store ptr null, ptr [[TMP107]], align 8
7097 // CHECK1-NEXT:    [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
7098 // CHECK1-NEXT:    store i64 0, ptr [[TMP108]], align 8
7099 // CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
7100 // CHECK1-NEXT:    store i64 0, ptr [[TMP109]], align 8
7101 // CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
7102 // CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
7103 // CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
7104 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
7105 // CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
7106 // CHECK1-NEXT:    store i32 0, ptr [[TMP112]], align 4
7107 // CHECK1-NEXT:    [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
7108 // CHECK1-NEXT:    [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
7109 // CHECK1-NEXT:    br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
7110 // CHECK1:       omp_offload.failed12:
7111 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
7112 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
7113 // CHECK1:       omp_offload.cont13:
7114 // CHECK1-NEXT:    [[TMP115:%.*]] = load i32, ptr [[A]], align 4
7115 // CHECK1-NEXT:    [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
7116 // CHECK1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP116]])
7117 // CHECK1-NEXT:    ret i32 [[TMP115]]
7118 //
7119 //
7120 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
7121 // CHECK1-SAME: (i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
7122 // CHECK1-NEXT:  entry:
7123 // CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
7124 // CHECK1-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
7125 // CHECK1-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
7126 // CHECK1-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
7127 // CHECK1-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
7128 // CHECK1-NEXT:    store i64 [[GA]], ptr [[GA_ADDR]], align 8
7129 // CHECK1-NEXT:    ret void
7130 //
7131 //
7132 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
7133 // CHECK1-SAME: (i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
7134 // CHECK1-NEXT:  entry:
7135 // CHECK1-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
7136 // CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
7137 // CHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
7138 // CHECK1-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 8
7139 // CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
7140 // CHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
7141 // CHECK1-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
7142 // CHECK1-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 8
7143 // CHECK1-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 8
7144 // CHECK1-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
7145 // CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
7146 // CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
7147 // CHECK1-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
7148 // CHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
7149 // CHECK1-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
7150 // CHECK1-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
7151 // CHECK1-NEXT:    store i64 [[AA]], ptr [[AA_ADDR]], align 8
7152 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
7153 // CHECK1-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
7154 // CHECK1-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 8
7155 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
7156 // CHECK1-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
7157 // CHECK1-NEXT:    store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
7158 // CHECK1-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 8
7159 // CHECK1-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
7160 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
7161 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
7162 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
7163 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
7164 // CHECK1-NEXT:    [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
7165 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
7166 // CHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
7167 // CHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
7168 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
7169 // CHECK1-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
7170 // CHECK1-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
7171 // CHECK1-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
7172 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
7173 // CHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
7174 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
7175 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
7176 // CHECK1-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
7177 // CHECK1-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
7178 // CHECK1-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
7179 // CHECK1-NEXT:    store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
7180 // CHECK1-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
7181 // CHECK1-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
7182 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
7183 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
7184 // CHECK1-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
7185 // CHECK1-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
7186 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
7187 // CHECK1-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
7188 // CHECK1-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
7189 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
7190 // CHECK1-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
7191 // CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
7192 // CHECK1-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
7193 // CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
7194 // CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
7195 // CHECK1-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
7196 // CHECK1-NEXT:    [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
7197 // CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
7198 // CHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
7199 // CHECK1-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
7200 // CHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
7201 // CHECK1-NEXT:    store i64 1, ptr [[X]], align 8
7202 // CHECK1-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
7203 // CHECK1-NEXT:    store i8 1, ptr [[Y]], align 8
7204 // CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
7205 // CHECK1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
7206 // CHECK1-NEXT:    ret void
7207 //
7208 //
7209 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
7210 // CHECK1-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
7211 // CHECK1-NEXT:  entry:
7212 // CHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
7213 // CHECK1-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
7214 // CHECK1-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
7215 // CHECK1-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
7216 // CHECK1-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
7217 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
7218 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i64 8, i1 false)
7219 // CHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
7220 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
7221 // CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
7222 // CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
7223 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
7224 // CHECK1-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 8
7225 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
7226 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
7227 // CHECK1-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
7228 // CHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
7229 // CHECK1-NEXT:    store double [[INC]], ptr [[ARRAYIDX2]], align 8
7230 // CHECK1-NEXT:    ret void
7231 //
7232 //
7233 // CHECK1-LABEL: define {{[^@]+}}@_Z3bariPd
7234 // CHECK1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
7235 // CHECK1-NEXT:  entry:
7236 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
7237 // CHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
7238 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
7239 // CHECK1-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
7240 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
7241 // CHECK1-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
7242 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
7243 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
7244 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
7245 // CHECK1-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
7246 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
7247 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
7248 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
7249 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
7250 // CHECK1-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
7251 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
7252 // CHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
7253 // CHECK1-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
7254 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
7255 // CHECK1-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
7256 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
7257 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
7258 // CHECK1-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
7259 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
7260 // CHECK1-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
7261 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
7262 // CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
7263 // CHECK1-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
7264 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
7265 // CHECK1-NEXT:    ret i32 [[TMP9]]
7266 //
7267 //
7268 // CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
7269 // CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
7270 // CHECK1-NEXT:  entry:
7271 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
7272 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
7273 // CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
7274 // CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
7275 // CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
7276 // CHECK1-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
7277 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 8
7278 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
7279 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
7280 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
7281 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
7282 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
7283 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
7284 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
7285 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
7286 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
7287 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
7288 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
7289 // CHECK1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
7290 // CHECK1-NEXT:    [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
7291 // CHECK1-NEXT:    store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
7292 // CHECK1-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
7293 // CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
7294 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
7295 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B]], align 4
7296 // CHECK1-NEXT:    store i32 [[TMP5]], ptr [[B_CASTED]], align 4
7297 // CHECK1-NEXT:    [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
7298 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
7299 // CHECK1-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
7300 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
7301 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.5, i64 40, i1 false)
7302 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7303 // CHECK1-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 8
7304 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7305 // CHECK1-NEXT:    store ptr [[A]], ptr [[TMP10]], align 8
7306 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
7307 // CHECK1-NEXT:    store ptr null, ptr [[TMP11]], align 8
7308 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
7309 // CHECK1-NEXT:    store i64 [[TMP6]], ptr [[TMP12]], align 8
7310 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
7311 // CHECK1-NEXT:    store i64 [[TMP6]], ptr [[TMP13]], align 8
7312 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
7313 // CHECK1-NEXT:    store ptr null, ptr [[TMP14]], align 8
7314 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
7315 // CHECK1-NEXT:    store i64 2, ptr [[TMP15]], align 8
7316 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
7317 // CHECK1-NEXT:    store i64 2, ptr [[TMP16]], align 8
7318 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
7319 // CHECK1-NEXT:    store ptr null, ptr [[TMP17]], align 8
7320 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
7321 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[TMP18]], align 8
7322 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
7323 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[TMP19]], align 8
7324 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
7325 // CHECK1-NEXT:    store ptr null, ptr [[TMP20]], align 8
7326 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
7327 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP21]], align 8
7328 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
7329 // CHECK1-NEXT:    store ptr [[VLA]], ptr [[TMP22]], align 8
7330 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
7331 // CHECK1-NEXT:    store i64 [[TMP8]], ptr [[TMP23]], align 8
7332 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
7333 // CHECK1-NEXT:    store ptr null, ptr [[TMP24]], align 8
7334 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7335 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7336 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
7337 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
7338 // CHECK1-NEXT:    store i32 3, ptr [[TMP28]], align 4
7339 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
7340 // CHECK1-NEXT:    store i32 5, ptr [[TMP29]], align 4
7341 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
7342 // CHECK1-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 8
7343 // CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
7344 // CHECK1-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 8
7345 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
7346 // CHECK1-NEXT:    store ptr [[TMP27]], ptr [[TMP32]], align 8
7347 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
7348 // CHECK1-NEXT:    store ptr @.offload_maptypes.6, ptr [[TMP33]], align 8
7349 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
7350 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
7351 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
7352 // CHECK1-NEXT:    store ptr null, ptr [[TMP35]], align 8
7353 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
7354 // CHECK1-NEXT:    store i64 0, ptr [[TMP36]], align 8
7355 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
7356 // CHECK1-NEXT:    store i64 0, ptr [[TMP37]], align 8
7357 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
7358 // CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
7359 // CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
7360 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
7361 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
7362 // CHECK1-NEXT:    store i32 0, ptr [[TMP40]], align 4
7363 // CHECK1-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
7364 // CHECK1-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
7365 // CHECK1-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
7366 // CHECK1:       omp_offload.failed:
7367 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], ptr [[VLA]]) #[[ATTR3]]
7368 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
7369 // CHECK1:       omp_offload.cont:
7370 // CHECK1-NEXT:    [[TMP43:%.*]] = mul nsw i64 1, [[TMP2]]
7371 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP43]]
7372 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
7373 // CHECK1-NEXT:    [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
7374 // CHECK1-NEXT:    [[CONV:%.*]] = sext i16 [[TMP44]] to i32
7375 // CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[B]], align 4
7376 // CHECK1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
7377 // CHECK1-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
7378 // CHECK1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP46]])
7379 // CHECK1-NEXT:    ret i32 [[ADD3]]
7380 //
7381 //
7382 // CHECK1-LABEL: define {{[^@]+}}@_ZL7fstatici
7383 // CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
7384 // CHECK1-NEXT:  entry:
7385 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
7386 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
7387 // CHECK1-NEXT:    [[AAA:%.*]] = alloca i8, align 1
7388 // CHECK1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
7389 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
7390 // CHECK1-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
7391 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
7392 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
7393 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
7394 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
7395 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
7396 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
7397 // CHECK1-NEXT:    store i8 0, ptr [[AAA]], align 1
7398 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
7399 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
7400 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
7401 // CHECK1-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
7402 // CHECK1-NEXT:    store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
7403 // CHECK1-NEXT:    [[TMP3:%.*]] = load i64, ptr [[AAA_CASTED]], align 8
7404 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7405 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP4]], align 8
7406 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7407 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP5]], align 8
7408 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
7409 // CHECK1-NEXT:    store ptr null, ptr [[TMP6]], align 8
7410 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
7411 // CHECK1-NEXT:    store i64 [[TMP3]], ptr [[TMP7]], align 8
7412 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
7413 // CHECK1-NEXT:    store i64 [[TMP3]], ptr [[TMP8]], align 8
7414 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
7415 // CHECK1-NEXT:    store ptr null, ptr [[TMP9]], align 8
7416 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
7417 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP10]], align 8
7418 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
7419 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP11]], align 8
7420 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
7421 // CHECK1-NEXT:    store ptr null, ptr [[TMP12]], align 8
7422 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7423 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7424 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
7425 // CHECK1-NEXT:    store i32 3, ptr [[TMP15]], align 4
7426 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
7427 // CHECK1-NEXT:    store i32 3, ptr [[TMP16]], align 4
7428 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
7429 // CHECK1-NEXT:    store ptr [[TMP13]], ptr [[TMP17]], align 8
7430 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
7431 // CHECK1-NEXT:    store ptr [[TMP14]], ptr [[TMP18]], align 8
7432 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
7433 // CHECK1-NEXT:    store ptr @.offload_sizes.7, ptr [[TMP19]], align 8
7434 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
7435 // CHECK1-NEXT:    store ptr @.offload_maptypes.8, ptr [[TMP20]], align 8
7436 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
7437 // CHECK1-NEXT:    store ptr null, ptr [[TMP21]], align 8
7438 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
7439 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
7440 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
7441 // CHECK1-NEXT:    store i64 0, ptr [[TMP23]], align 8
7442 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
7443 // CHECK1-NEXT:    store i64 0, ptr [[TMP24]], align 8
7444 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
7445 // CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
7446 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
7447 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
7448 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
7449 // CHECK1-NEXT:    store i32 0, ptr [[TMP27]], align 4
7450 // CHECK1-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
7451 // CHECK1-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
7452 // CHECK1-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
7453 // CHECK1:       omp_offload.failed:
7454 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], ptr [[B]]) #[[ATTR3]]
7455 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
7456 // CHECK1:       omp_offload.cont:
7457 // CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[A]], align 4
7458 // CHECK1-NEXT:    ret i32 [[TMP30]]
7459 //
7460 //
7461 // CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
7462 // CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
7463 // CHECK1-NEXT:  entry:
7464 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
7465 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
7466 // CHECK1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
7467 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
7468 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
7469 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
7470 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
7471 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
7472 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
7473 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
7474 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
7475 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
7476 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
7477 // CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7478 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP2]], align 8
7479 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7480 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP3]], align 8
7481 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
7482 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
7483 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
7484 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP5]], align 8
7485 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
7486 // CHECK1-NEXT:    store ptr [[B]], ptr [[TMP6]], align 8
7487 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
7488 // CHECK1-NEXT:    store ptr null, ptr [[TMP7]], align 8
7489 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7490 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7491 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
7492 // CHECK1-NEXT:    store i32 3, ptr [[TMP10]], align 4
7493 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
7494 // CHECK1-NEXT:    store i32 2, ptr [[TMP11]], align 4
7495 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
7496 // CHECK1-NEXT:    store ptr [[TMP8]], ptr [[TMP12]], align 8
7497 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
7498 // CHECK1-NEXT:    store ptr [[TMP9]], ptr [[TMP13]], align 8
7499 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
7500 // CHECK1-NEXT:    store ptr @.offload_sizes.9, ptr [[TMP14]], align 8
7501 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
7502 // CHECK1-NEXT:    store ptr @.offload_maptypes.10, ptr [[TMP15]], align 8
7503 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
7504 // CHECK1-NEXT:    store ptr null, ptr [[TMP16]], align 8
7505 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
7506 // CHECK1-NEXT:    store ptr null, ptr [[TMP17]], align 8
7507 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
7508 // CHECK1-NEXT:    store i64 0, ptr [[TMP18]], align 8
7509 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
7510 // CHECK1-NEXT:    store i64 0, ptr [[TMP19]], align 8
7511 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
7512 // CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
7513 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
7514 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
7515 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
7516 // CHECK1-NEXT:    store i32 0, ptr [[TMP22]], align 4
7517 // CHECK1-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
7518 // CHECK1-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
7519 // CHECK1-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
7520 // CHECK1:       omp_offload.failed:
7521 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], ptr [[B]]) #[[ATTR3]]
7522 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
7523 // CHECK1:       omp_offload.cont:
7524 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[A]], align 4
7525 // CHECK1-NEXT:    ret i32 [[TMP25]]
7526 //
7527 //
7528 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
7529 // CHECK1-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
7530 // CHECK1-NEXT:  entry:
7531 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
7532 // CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
7533 // CHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
7534 // CHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
7535 // CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
7536 // CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
7537 // CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
7538 // CHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
7539 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
7540 // CHECK1-NEXT:    store i64 [[B]], ptr [[B_ADDR]], align 8
7541 // CHECK1-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
7542 // CHECK1-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
7543 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
7544 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
7545 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
7546 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
7547 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
7548 // CHECK1-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
7549 // CHECK1-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
7550 // CHECK1-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
7551 // CHECK1-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
7552 // CHECK1-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
7553 // CHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
7554 // CHECK1-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
7555 // CHECK1-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
7556 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
7557 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
7558 // CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
7559 // CHECK1-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
7560 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
7561 // CHECK1-NEXT:    store double [[ADD]], ptr [[A]], align 8
7562 // CHECK1-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
7563 // CHECK1-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 8
7564 // CHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
7565 // CHECK1-NEXT:    store double [[INC]], ptr [[A4]], align 8
7566 // CHECK1-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
7567 // CHECK1-NEXT:    [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
7568 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
7569 // CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
7570 // CHECK1-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
7571 // CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
7572 // CHECK1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
7573 // CHECK1-NEXT:    ret void
7574 //
7575 //
7576 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
7577 // CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
7578 // CHECK1-NEXT:  entry:
7579 // CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
7580 // CHECK1-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
7581 // CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
7582 // CHECK1-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
7583 // CHECK1-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
7584 // CHECK1-NEXT:    store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
7585 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
7586 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
7587 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
7588 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
7589 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
7590 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
7591 // CHECK1-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
7592 // CHECK1-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
7593 // CHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
7594 // CHECK1-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
7595 // CHECK1-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
7596 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
7597 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
7598 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
7599 // CHECK1-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
7600 // CHECK1-NEXT:    ret void
7601 //
7602 //
7603 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
7604 // CHECK1-SAME: (i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
7605 // CHECK1-NEXT:  entry:
7606 // CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
7607 // CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
7608 // CHECK1-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
7609 // CHECK1-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
7610 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
7611 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
7612 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
7613 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
7614 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
7615 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
7616 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
7617 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
7618 // CHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
7619 // CHECK1-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
7620 // CHECK1-NEXT:    ret void
7621 //
7622 //
7623 // CHECK2-LABEL: define {{[^@]+}}@_Z3fooiPd
7624 // CHECK2-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
7625 // CHECK2-NEXT:  entry:
7626 // CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
7627 // CHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
7628 // CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
7629 // CHECK2-NEXT:    [[AA:%.*]] = alloca i16, align 2
7630 // CHECK2-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
7631 // CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
7632 // CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
7633 // CHECK2-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
7634 // CHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
7635 // CHECK2-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
7636 // CHECK2-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
7637 // CHECK2-NEXT:    [[P:%.*]] = alloca ptr, align 64
7638 // CHECK2-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
7639 // CHECK2-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
7640 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
7641 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
7642 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
7643 // CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
7644 // CHECK2-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
7645 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 4
7646 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 4
7647 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 4
7648 // CHECK2-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
7649 // CHECK2-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
7650 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
7651 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
7652 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
7653 // CHECK2-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
7654 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
7655 // CHECK2-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
7656 // CHECK2-NEXT:    store i32 0, ptr [[A]], align 4
7657 // CHECK2-NEXT:    store i16 0, ptr [[AA]], align 2
7658 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
7659 // CHECK2-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
7660 // CHECK2-NEXT:    store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
7661 // CHECK2-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
7662 // CHECK2-NEXT:    store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
7663 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
7664 // CHECK2-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
7665 // CHECK2-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
7666 // CHECK2-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
7667 // CHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
7668 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
7669 // CHECK2-NEXT:    store i32 [[TMP4]], ptr [[X]], align 4
7670 // CHECK2-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
7671 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
7672 // CHECK2-NEXT:    store i32 [[TMP5]], ptr [[Y]], align 4
7673 // CHECK2-NEXT:    store ptr [[A]], ptr [[P]], align 64
7674 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
7675 // CHECK2-NEXT:    store i32 [[TMP6]], ptr [[A_CASTED]], align 4
7676 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4
7677 // CHECK2-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[P]], align 64
7678 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr @ga, align 4
7679 // CHECK2-NEXT:    store i32 [[TMP9]], ptr [[GA_CASTED]], align 4
7680 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[GA_CASTED]], align 4
7681 // CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7682 // CHECK2-NEXT:    store i32 [[TMP7]], ptr [[TMP11]], align 4
7683 // CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7684 // CHECK2-NEXT:    store i32 [[TMP7]], ptr [[TMP12]], align 4
7685 // CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
7686 // CHECK2-NEXT:    store ptr null, ptr [[TMP13]], align 4
7687 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
7688 // CHECK2-NEXT:    store ptr [[TMP8]], ptr [[TMP14]], align 4
7689 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
7690 // CHECK2-NEXT:    store ptr [[TMP8]], ptr [[TMP15]], align 4
7691 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
7692 // CHECK2-NEXT:    store ptr null, ptr [[TMP16]], align 4
7693 // CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
7694 // CHECK2-NEXT:    store i32 [[TMP10]], ptr [[TMP17]], align 4
7695 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
7696 // CHECK2-NEXT:    store i32 [[TMP10]], ptr [[TMP18]], align 4
7697 // CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
7698 // CHECK2-NEXT:    store ptr null, ptr [[TMP19]], align 4
7699 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
7700 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
7701 // CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
7702 // CHECK2-NEXT:    store i32 3, ptr [[TMP22]], align 4
7703 // CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
7704 // CHECK2-NEXT:    store i32 3, ptr [[TMP23]], align 4
7705 // CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
7706 // CHECK2-NEXT:    store ptr [[TMP20]], ptr [[TMP24]], align 4
7707 // CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
7708 // CHECK2-NEXT:    store ptr [[TMP21]], ptr [[TMP25]], align 4
7709 // CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
7710 // CHECK2-NEXT:    store ptr @.offload_sizes, ptr [[TMP26]], align 4
7711 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
7712 // CHECK2-NEXT:    store ptr @.offload_maptypes, ptr [[TMP27]], align 4
7713 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
7714 // CHECK2-NEXT:    store ptr null, ptr [[TMP28]], align 4
7715 // CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
7716 // CHECK2-NEXT:    store ptr null, ptr [[TMP29]], align 4
7717 // CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
7718 // CHECK2-NEXT:    store i64 0, ptr [[TMP30]], align 8
7719 // CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
7720 // CHECK2-NEXT:    store i64 0, ptr [[TMP31]], align 8
7721 // CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
7722 // CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP32]], align 4
7723 // CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
7724 // CHECK2-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4
7725 // CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
7726 // CHECK2-NEXT:    store i32 0, ptr [[TMP34]], align 4
7727 // CHECK2-NEXT:    [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
7728 // CHECK2-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
7729 // CHECK2-NEXT:    br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
7730 // CHECK2:       omp_offload.failed:
7731 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], ptr [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
7732 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
7733 // CHECK2:       omp_offload.cont:
7734 // CHECK2-NEXT:    [[TMP37:%.*]] = load i16, ptr [[AA]], align 2
7735 // CHECK2-NEXT:    store i16 [[TMP37]], ptr [[AA_CASTED]], align 2
7736 // CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[AA_CASTED]], align 4
7737 // CHECK2-NEXT:    [[TMP39:%.*]] = mul nuw i32 [[TMP0]], 4
7738 // CHECK2-NEXT:    [[TMP40:%.*]] = sext i32 [[TMP39]] to i64
7739 // CHECK2-NEXT:    [[TMP41:%.*]] = mul nuw i32 5, [[TMP2]]
7740 // CHECK2-NEXT:    [[TMP42:%.*]] = mul nuw i32 [[TMP41]], 8
7741 // CHECK2-NEXT:    [[TMP43:%.*]] = sext i32 [[TMP42]] to i64
7742 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 72, i1 false)
7743 // CHECK2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
7744 // CHECK2-NEXT:    store i32 [[TMP38]], ptr [[TMP44]], align 4
7745 // CHECK2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
7746 // CHECK2-NEXT:    store i32 [[TMP38]], ptr [[TMP45]], align 4
7747 // CHECK2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
7748 // CHECK2-NEXT:    store ptr null, ptr [[TMP46]], align 4
7749 // CHECK2-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
7750 // CHECK2-NEXT:    store ptr [[B]], ptr [[TMP47]], align 4
7751 // CHECK2-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
7752 // CHECK2-NEXT:    store ptr [[B]], ptr [[TMP48]], align 4
7753 // CHECK2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
7754 // CHECK2-NEXT:    store ptr null, ptr [[TMP49]], align 4
7755 // CHECK2-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
7756 // CHECK2-NEXT:    store i32 [[TMP0]], ptr [[TMP50]], align 4
7757 // CHECK2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
7758 // CHECK2-NEXT:    store i32 [[TMP0]], ptr [[TMP51]], align 4
7759 // CHECK2-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
7760 // CHECK2-NEXT:    store ptr null, ptr [[TMP52]], align 4
7761 // CHECK2-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
7762 // CHECK2-NEXT:    store ptr [[VLA]], ptr [[TMP53]], align 4
7763 // CHECK2-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
7764 // CHECK2-NEXT:    store ptr [[VLA]], ptr [[TMP54]], align 4
7765 // CHECK2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
7766 // CHECK2-NEXT:    store i64 [[TMP40]], ptr [[TMP55]], align 4
7767 // CHECK2-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
7768 // CHECK2-NEXT:    store ptr null, ptr [[TMP56]], align 4
7769 // CHECK2-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
7770 // CHECK2-NEXT:    store ptr [[C]], ptr [[TMP57]], align 4
7771 // CHECK2-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
7772 // CHECK2-NEXT:    store ptr [[C]], ptr [[TMP58]], align 4
7773 // CHECK2-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
7774 // CHECK2-NEXT:    store ptr null, ptr [[TMP59]], align 4
7775 // CHECK2-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
7776 // CHECK2-NEXT:    store i32 5, ptr [[TMP60]], align 4
7777 // CHECK2-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
7778 // CHECK2-NEXT:    store i32 5, ptr [[TMP61]], align 4
7779 // CHECK2-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
7780 // CHECK2-NEXT:    store ptr null, ptr [[TMP62]], align 4
7781 // CHECK2-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
7782 // CHECK2-NEXT:    store i32 [[TMP2]], ptr [[TMP63]], align 4
7783 // CHECK2-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
7784 // CHECK2-NEXT:    store i32 [[TMP2]], ptr [[TMP64]], align 4
7785 // CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
7786 // CHECK2-NEXT:    store ptr null, ptr [[TMP65]], align 4
7787 // CHECK2-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
7788 // CHECK2-NEXT:    store ptr [[VLA1]], ptr [[TMP66]], align 4
7789 // CHECK2-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
7790 // CHECK2-NEXT:    store ptr [[VLA1]], ptr [[TMP67]], align 4
7791 // CHECK2-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
7792 // CHECK2-NEXT:    store i64 [[TMP43]], ptr [[TMP68]], align 4
7793 // CHECK2-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
7794 // CHECK2-NEXT:    store ptr null, ptr [[TMP69]], align 4
7795 // CHECK2-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
7796 // CHECK2-NEXT:    store ptr [[D]], ptr [[TMP70]], align 4
7797 // CHECK2-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
7798 // CHECK2-NEXT:    store ptr [[D]], ptr [[TMP71]], align 4
7799 // CHECK2-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
7800 // CHECK2-NEXT:    store ptr null, ptr [[TMP72]], align 4
7801 // CHECK2-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
7802 // CHECK2-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
7803 // CHECK2-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
7804 // CHECK2-NEXT:    [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
7805 // CHECK2-NEXT:    store i32 3, ptr [[TMP76]], align 4
7806 // CHECK2-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
7807 // CHECK2-NEXT:    store i32 9, ptr [[TMP77]], align 4
7808 // CHECK2-NEXT:    [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
7809 // CHECK2-NEXT:    store ptr [[TMP73]], ptr [[TMP78]], align 4
7810 // CHECK2-NEXT:    [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
7811 // CHECK2-NEXT:    store ptr [[TMP74]], ptr [[TMP79]], align 4
7812 // CHECK2-NEXT:    [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
7813 // CHECK2-NEXT:    store ptr [[TMP75]], ptr [[TMP80]], align 4
7814 // CHECK2-NEXT:    [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
7815 // CHECK2-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP81]], align 4
7816 // CHECK2-NEXT:    [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
7817 // CHECK2-NEXT:    store ptr null, ptr [[TMP82]], align 4
7818 // CHECK2-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
7819 // CHECK2-NEXT:    store ptr null, ptr [[TMP83]], align 4
7820 // CHECK2-NEXT:    [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
7821 // CHECK2-NEXT:    store i64 0, ptr [[TMP84]], align 8
7822 // CHECK2-NEXT:    [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
7823 // CHECK2-NEXT:    store i64 0, ptr [[TMP85]], align 8
7824 // CHECK2-NEXT:    [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
7825 // CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
7826 // CHECK2-NEXT:    [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
7827 // CHECK2-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
7828 // CHECK2-NEXT:    [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
7829 // CHECK2-NEXT:    store i32 0, ptr [[TMP88]], align 4
7830 // CHECK2-NEXT:    [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
7831 // CHECK2-NEXT:    [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
7832 // CHECK2-NEXT:    br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
7833 // CHECK2:       omp_offload.failed6:
7834 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP38]], ptr [[B]], i32 [[TMP0]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP2]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
7835 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
7836 // CHECK2:       omp_offload.cont7:
7837 // CHECK2-NEXT:    [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
7838 // CHECK2-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
7839 // CHECK2-NEXT:    store ptr [[TMP91]], ptr [[TMP92]], align 4
7840 // CHECK2-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
7841 // CHECK2-NEXT:    store ptr [[TMP91]], ptr [[TMP93]], align 4
7842 // CHECK2-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
7843 // CHECK2-NEXT:    store ptr null, ptr [[TMP94]], align 4
7844 // CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
7845 // CHECK2-NEXT:    store ptr [[E]], ptr [[TMP95]], align 4
7846 // CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
7847 // CHECK2-NEXT:    store ptr [[E]], ptr [[TMP96]], align 4
7848 // CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
7849 // CHECK2-NEXT:    store ptr null, ptr [[TMP97]], align 4
7850 // CHECK2-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
7851 // CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
7852 // CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
7853 // CHECK2-NEXT:    store i32 3, ptr [[TMP100]], align 4
7854 // CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
7855 // CHECK2-NEXT:    store i32 2, ptr [[TMP101]], align 4
7856 // CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
7857 // CHECK2-NEXT:    store ptr [[TMP98]], ptr [[TMP102]], align 4
7858 // CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
7859 // CHECK2-NEXT:    store ptr [[TMP99]], ptr [[TMP103]], align 4
7860 // CHECK2-NEXT:    [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
7861 // CHECK2-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP104]], align 4
7862 // CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
7863 // CHECK2-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP105]], align 4
7864 // CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
7865 // CHECK2-NEXT:    store ptr null, ptr [[TMP106]], align 4
7866 // CHECK2-NEXT:    [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
7867 // CHECK2-NEXT:    store ptr null, ptr [[TMP107]], align 4
7868 // CHECK2-NEXT:    [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
7869 // CHECK2-NEXT:    store i64 0, ptr [[TMP108]], align 8
7870 // CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
7871 // CHECK2-NEXT:    store i64 0, ptr [[TMP109]], align 8
7872 // CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
7873 // CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
7874 // CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
7875 // CHECK2-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
7876 // CHECK2-NEXT:    [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
7877 // CHECK2-NEXT:    store i32 0, ptr [[TMP112]], align 4
7878 // CHECK2-NEXT:    [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
7879 // CHECK2-NEXT:    [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
7880 // CHECK2-NEXT:    br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
7881 // CHECK2:       omp_offload.failed12:
7882 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
7883 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
7884 // CHECK2:       omp_offload.cont13:
7885 // CHECK2-NEXT:    [[TMP115:%.*]] = load i32, ptr [[A]], align 4
7886 // CHECK2-NEXT:    [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
7887 // CHECK2-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP116]])
7888 // CHECK2-NEXT:    ret i32 [[TMP115]]
7889 //
7890 //
7891 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
7892 // CHECK2-SAME: (i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
7893 // CHECK2-NEXT:  entry:
7894 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
7895 // CHECK2-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 4
7896 // CHECK2-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
7897 // CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
7898 // CHECK2-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 4
7899 // CHECK2-NEXT:    store i32 [[GA]], ptr [[GA_ADDR]], align 4
7900 // CHECK2-NEXT:    ret void
7901 //
7902 //
7903 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
7904 // CHECK2-SAME: (i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
7905 // CHECK2-NEXT:  entry:
7906 // CHECK2-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
7907 // CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
7908 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
7909 // CHECK2-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 4
7910 // CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
7911 // CHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
7912 // CHECK2-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
7913 // CHECK2-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 4
7914 // CHECK2-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 4
7915 // CHECK2-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
7916 // CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
7917 // CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
7918 // CHECK2-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
7919 // CHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
7920 // CHECK2-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
7921 // CHECK2-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
7922 // CHECK2-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
7923 // CHECK2-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
7924 // CHECK2-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
7925 // CHECK2-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 4
7926 // CHECK2-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
7927 // CHECK2-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
7928 // CHECK2-NEXT:    store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
7929 // CHECK2-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 4
7930 // CHECK2-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
7931 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
7932 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
7933 // CHECK2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
7934 // CHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
7935 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
7936 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
7937 // CHECK2-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
7938 // CHECK2-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
7939 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
7940 // CHECK2-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
7941 // CHECK2-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
7942 // CHECK2-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
7943 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
7944 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
7945 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
7946 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
7947 // CHECK2-NEXT:    [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
7948 // CHECK2-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
7949 // CHECK2-NEXT:    store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
7950 // CHECK2-NEXT:    store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
7951 // CHECK2-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
7952 // CHECK2-NEXT:    [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
7953 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
7954 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
7955 // CHECK2-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
7956 // CHECK2-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
7957 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
7958 // CHECK2-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
7959 // CHECK2-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
7960 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
7961 // CHECK2-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
7962 // CHECK2-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
7963 // CHECK2-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
7964 // CHECK2-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
7965 // CHECK2-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
7966 // CHECK2-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
7967 // CHECK2-NEXT:    [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
7968 // CHECK2-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
7969 // CHECK2-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
7970 // CHECK2-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
7971 // CHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
7972 // CHECK2-NEXT:    store i64 1, ptr [[X]], align 4
7973 // CHECK2-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
7974 // CHECK2-NEXT:    store i8 1, ptr [[Y]], align 4
7975 // CHECK2-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
7976 // CHECK2-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
7977 // CHECK2-NEXT:    ret void
7978 //
7979 //
7980 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
7981 // CHECK2-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
7982 // CHECK2-NEXT:  entry:
7983 // CHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
7984 // CHECK2-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 4
7985 // CHECK2-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
7986 // CHECK2-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
7987 // CHECK2-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 4
7988 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
7989 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i32 8, i1 false)
7990 // CHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
7991 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
7992 // CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
7993 // CHECK2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
7994 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
7995 // CHECK2-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 4
7996 // CHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
7997 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
7998 // CHECK2-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 4
7999 // CHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
8000 // CHECK2-NEXT:    store double [[INC]], ptr [[ARRAYIDX2]], align 4
8001 // CHECK2-NEXT:    ret void
8002 //
8003 //
8004 // CHECK2-LABEL: define {{[^@]+}}@_Z3bariPd
8005 // CHECK2-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
8006 // CHECK2-NEXT:  entry:
8007 // CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8008 // CHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
8009 // CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
8010 // CHECK2-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
8011 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8012 // CHECK2-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
8013 // CHECK2-NEXT:    store i32 0, ptr [[A]], align 4
8014 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
8015 // CHECK2-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
8016 // CHECK2-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
8017 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
8018 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
8019 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
8020 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
8021 // CHECK2-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
8022 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
8023 // CHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
8024 // CHECK2-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
8025 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
8026 // CHECK2-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
8027 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
8028 // CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
8029 // CHECK2-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
8030 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
8031 // CHECK2-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
8032 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
8033 // CHECK2-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
8034 // CHECK2-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
8035 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
8036 // CHECK2-NEXT:    ret i32 [[TMP9]]
8037 //
8038 //
8039 // CHECK2-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
8040 // CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
8041 // CHECK2-NEXT:  entry:
8042 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
8043 // CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8044 // CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
8045 // CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
8046 // CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
8047 // CHECK2-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
8048 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 4
8049 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
8050 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
8051 // CHECK2-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
8052 // CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
8053 // CHECK2-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
8054 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8055 // CHECK2-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
8056 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
8057 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
8058 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
8059 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
8060 // CHECK2-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
8061 // CHECK2-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
8062 // CHECK2-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
8063 // CHECK2-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
8064 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
8065 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B]], align 4
8066 // CHECK2-NEXT:    store i32 [[TMP4]], ptr [[B_CASTED]], align 4
8067 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4
8068 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
8069 // CHECK2-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
8070 // CHECK2-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
8071 // CHECK2-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
8072 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.5, i32 40, i1 false)
8073 // CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8074 // CHECK2-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 4
8075 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8076 // CHECK2-NEXT:    store ptr [[A]], ptr [[TMP10]], align 4
8077 // CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
8078 // CHECK2-NEXT:    store ptr null, ptr [[TMP11]], align 4
8079 // CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
8080 // CHECK2-NEXT:    store i32 [[TMP5]], ptr [[TMP12]], align 4
8081 // CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
8082 // CHECK2-NEXT:    store i32 [[TMP5]], ptr [[TMP13]], align 4
8083 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
8084 // CHECK2-NEXT:    store ptr null, ptr [[TMP14]], align 4
8085 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
8086 // CHECK2-NEXT:    store i32 2, ptr [[TMP15]], align 4
8087 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
8088 // CHECK2-NEXT:    store i32 2, ptr [[TMP16]], align 4
8089 // CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
8090 // CHECK2-NEXT:    store ptr null, ptr [[TMP17]], align 4
8091 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
8092 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TMP18]], align 4
8093 // CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
8094 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TMP19]], align 4
8095 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
8096 // CHECK2-NEXT:    store ptr null, ptr [[TMP20]], align 4
8097 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
8098 // CHECK2-NEXT:    store ptr [[VLA]], ptr [[TMP21]], align 4
8099 // CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
8100 // CHECK2-NEXT:    store ptr [[VLA]], ptr [[TMP22]], align 4
8101 // CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
8102 // CHECK2-NEXT:    store i64 [[TMP8]], ptr [[TMP23]], align 4
8103 // CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
8104 // CHECK2-NEXT:    store ptr null, ptr [[TMP24]], align 4
8105 // CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8106 // CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8107 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
8108 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
8109 // CHECK2-NEXT:    store i32 3, ptr [[TMP28]], align 4
8110 // CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
8111 // CHECK2-NEXT:    store i32 5, ptr [[TMP29]], align 4
8112 // CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
8113 // CHECK2-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 4
8114 // CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
8115 // CHECK2-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 4
8116 // CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
8117 // CHECK2-NEXT:    store ptr [[TMP27]], ptr [[TMP32]], align 4
8118 // CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
8119 // CHECK2-NEXT:    store ptr @.offload_maptypes.6, ptr [[TMP33]], align 4
8120 // CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
8121 // CHECK2-NEXT:    store ptr null, ptr [[TMP34]], align 4
8122 // CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
8123 // CHECK2-NEXT:    store ptr null, ptr [[TMP35]], align 4
8124 // CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
8125 // CHECK2-NEXT:    store i64 0, ptr [[TMP36]], align 8
8126 // CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
8127 // CHECK2-NEXT:    store i64 0, ptr [[TMP37]], align 8
8128 // CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
8129 // CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
8130 // CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
8131 // CHECK2-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
8132 // CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
8133 // CHECK2-NEXT:    store i32 0, ptr [[TMP40]], align 4
8134 // CHECK2-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
8135 // CHECK2-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
8136 // CHECK2-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
8137 // CHECK2:       omp_offload.failed:
8138 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]]
8139 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
8140 // CHECK2:       omp_offload.cont:
8141 // CHECK2-NEXT:    [[TMP43:%.*]] = mul nsw i32 1, [[TMP1]]
8142 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP43]]
8143 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
8144 // CHECK2-NEXT:    [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
8145 // CHECK2-NEXT:    [[CONV:%.*]] = sext i16 [[TMP44]] to i32
8146 // CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[B]], align 4
8147 // CHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
8148 // CHECK2-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
8149 // CHECK2-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP46]])
8150 // CHECK2-NEXT:    ret i32 [[ADD3]]
8151 //
8152 //
8153 // CHECK2-LABEL: define {{[^@]+}}@_ZL7fstatici
8154 // CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
8155 // CHECK2-NEXT:  entry:
8156 // CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8157 // CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
8158 // CHECK2-NEXT:    [[AAA:%.*]] = alloca i8, align 1
8159 // CHECK2-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
8160 // CHECK2-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
8161 // CHECK2-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
8162 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
8163 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
8164 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
8165 // CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
8166 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8167 // CHECK2-NEXT:    store i32 0, ptr [[A]], align 4
8168 // CHECK2-NEXT:    store i8 0, ptr [[AAA]], align 1
8169 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
8170 // CHECK2-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
8171 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
8172 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
8173 // CHECK2-NEXT:    store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
8174 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[AAA_CASTED]], align 4
8175 // CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8176 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TMP4]], align 4
8177 // CHECK2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8178 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TMP5]], align 4
8179 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
8180 // CHECK2-NEXT:    store ptr null, ptr [[TMP6]], align 4
8181 // CHECK2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
8182 // CHECK2-NEXT:    store i32 [[TMP3]], ptr [[TMP7]], align 4
8183 // CHECK2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
8184 // CHECK2-NEXT:    store i32 [[TMP3]], ptr [[TMP8]], align 4
8185 // CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
8186 // CHECK2-NEXT:    store ptr null, ptr [[TMP9]], align 4
8187 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
8188 // CHECK2-NEXT:    store ptr [[B]], ptr [[TMP10]], align 4
8189 // CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
8190 // CHECK2-NEXT:    store ptr [[B]], ptr [[TMP11]], align 4
8191 // CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
8192 // CHECK2-NEXT:    store ptr null, ptr [[TMP12]], align 4
8193 // CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8194 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8195 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
8196 // CHECK2-NEXT:    store i32 3, ptr [[TMP15]], align 4
8197 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
8198 // CHECK2-NEXT:    store i32 3, ptr [[TMP16]], align 4
8199 // CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
8200 // CHECK2-NEXT:    store ptr [[TMP13]], ptr [[TMP17]], align 4
8201 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
8202 // CHECK2-NEXT:    store ptr [[TMP14]], ptr [[TMP18]], align 4
8203 // CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
8204 // CHECK2-NEXT:    store ptr @.offload_sizes.7, ptr [[TMP19]], align 4
8205 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
8206 // CHECK2-NEXT:    store ptr @.offload_maptypes.8, ptr [[TMP20]], align 4
8207 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
8208 // CHECK2-NEXT:    store ptr null, ptr [[TMP21]], align 4
8209 // CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
8210 // CHECK2-NEXT:    store ptr null, ptr [[TMP22]], align 4
8211 // CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
8212 // CHECK2-NEXT:    store i64 0, ptr [[TMP23]], align 8
8213 // CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
8214 // CHECK2-NEXT:    store i64 0, ptr [[TMP24]], align 8
8215 // CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
8216 // CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
8217 // CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
8218 // CHECK2-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
8219 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
8220 // CHECK2-NEXT:    store i32 0, ptr [[TMP27]], align 4
8221 // CHECK2-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
8222 // CHECK2-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
8223 // CHECK2-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
8224 // CHECK2:       omp_offload.failed:
8225 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]]
8226 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
8227 // CHECK2:       omp_offload.cont:
8228 // CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[A]], align 4
8229 // CHECK2-NEXT:    ret i32 [[TMP30]]
8230 //
8231 //
8232 // CHECK2-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
8233 // CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
8234 // CHECK2-NEXT:  entry:
8235 // CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8236 // CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
8237 // CHECK2-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
8238 // CHECK2-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
8239 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
8240 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
8241 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
8242 // CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
8243 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8244 // CHECK2-NEXT:    store i32 0, ptr [[A]], align 4
8245 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
8246 // CHECK2-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
8247 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
8248 // CHECK2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8249 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TMP2]], align 4
8250 // CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8251 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 4
8252 // CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
8253 // CHECK2-NEXT:    store ptr null, ptr [[TMP4]], align 4
8254 // CHECK2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
8255 // CHECK2-NEXT:    store ptr [[B]], ptr [[TMP5]], align 4
8256 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
8257 // CHECK2-NEXT:    store ptr [[B]], ptr [[TMP6]], align 4
8258 // CHECK2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
8259 // CHECK2-NEXT:    store ptr null, ptr [[TMP7]], align 4
8260 // CHECK2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8261 // CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8262 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
8263 // CHECK2-NEXT:    store i32 3, ptr [[TMP10]], align 4
8264 // CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
8265 // CHECK2-NEXT:    store i32 2, ptr [[TMP11]], align 4
8266 // CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
8267 // CHECK2-NEXT:    store ptr [[TMP8]], ptr [[TMP12]], align 4
8268 // CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
8269 // CHECK2-NEXT:    store ptr [[TMP9]], ptr [[TMP13]], align 4
8270 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
8271 // CHECK2-NEXT:    store ptr @.offload_sizes.9, ptr [[TMP14]], align 4
8272 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
8273 // CHECK2-NEXT:    store ptr @.offload_maptypes.10, ptr [[TMP15]], align 4
8274 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
8275 // CHECK2-NEXT:    store ptr null, ptr [[TMP16]], align 4
8276 // CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
8277 // CHECK2-NEXT:    store ptr null, ptr [[TMP17]], align 4
8278 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
8279 // CHECK2-NEXT:    store i64 0, ptr [[TMP18]], align 8
8280 // CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
8281 // CHECK2-NEXT:    store i64 0, ptr [[TMP19]], align 8
8282 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
8283 // CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
8284 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
8285 // CHECK2-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
8286 // CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
8287 // CHECK2-NEXT:    store i32 0, ptr [[TMP22]], align 4
8288 // CHECK2-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
8289 // CHECK2-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
8290 // CHECK2-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
8291 // CHECK2:       omp_offload.failed:
8292 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], ptr [[B]]) #[[ATTR3]]
8293 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
8294 // CHECK2:       omp_offload.cont:
8295 // CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[A]], align 4
8296 // CHECK2-NEXT:    ret i32 [[TMP25]]
8297 //
8298 //
8299 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
8300 // CHECK2-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
8301 // CHECK2-NEXT:  entry:
8302 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
8303 // CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
8304 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
8305 // CHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
8306 // CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
8307 // CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
8308 // CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
8309 // CHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
8310 // CHECK2-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
8311 // CHECK2-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
8312 // CHECK2-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
8313 // CHECK2-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
8314 // CHECK2-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
8315 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
8316 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
8317 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
8318 // CHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
8319 // CHECK2-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
8320 // CHECK2-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
8321 // CHECK2-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
8322 // CHECK2-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
8323 // CHECK2-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
8324 // CHECK2-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
8325 // CHECK2-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
8326 // CHECK2-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
8327 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
8328 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
8329 // CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
8330 // CHECK2-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
8331 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
8332 // CHECK2-NEXT:    store double [[ADD]], ptr [[A]], align 4
8333 // CHECK2-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
8334 // CHECK2-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 4
8335 // CHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
8336 // CHECK2-NEXT:    store double [[INC]], ptr [[A4]], align 4
8337 // CHECK2-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
8338 // CHECK2-NEXT:    [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
8339 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
8340 // CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
8341 // CHECK2-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
8342 // CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
8343 // CHECK2-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
8344 // CHECK2-NEXT:    ret void
8345 //
8346 //
8347 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
8348 // CHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
8349 // CHECK2-NEXT:  entry:
8350 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
8351 // CHECK2-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
8352 // CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
8353 // CHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
8354 // CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
8355 // CHECK2-NEXT:    store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
8356 // CHECK2-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
8357 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
8358 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
8359 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
8360 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
8361 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
8362 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
8363 // CHECK2-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
8364 // CHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
8365 // CHECK2-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
8366 // CHECK2-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
8367 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
8368 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
8369 // CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
8370 // CHECK2-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
8371 // CHECK2-NEXT:    ret void
8372 //
8373 //
8374 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
8375 // CHECK2-SAME: (i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
8376 // CHECK2-NEXT:  entry:
8377 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
8378 // CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
8379 // CHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
8380 // CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
8381 // CHECK2-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
8382 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
8383 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
8384 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
8385 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
8386 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
8387 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
8388 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
8389 // CHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
8390 // CHECK2-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
8391 // CHECK2-NEXT:    ret void
8392 //
8393 //
8394 // CHECK3-LABEL: define {{[^@]+}}@_Z3fooiPd
8395 // CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
8396 // CHECK3-NEXT:  entry:
8397 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8398 // CHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
8399 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
8400 // CHECK3-NEXT:    [[AA:%.*]] = alloca i16, align 2
8401 // CHECK3-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
8402 // CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
8403 // CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
8404 // CHECK3-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
8405 // CHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
8406 // CHECK3-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
8407 // CHECK3-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
8408 // CHECK3-NEXT:    [[P:%.*]] = alloca ptr, align 64
8409 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
8410 // CHECK3-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
8411 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
8412 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
8413 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
8414 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
8415 // CHECK3-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
8416 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x ptr], align 4
8417 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x ptr], align 4
8418 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x ptr], align 4
8419 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
8420 // CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
8421 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
8422 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
8423 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
8424 // CHECK3-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
8425 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8426 // CHECK3-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
8427 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
8428 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
8429 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
8430 // CHECK3-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
8431 // CHECK3-NEXT:    store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
8432 // CHECK3-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
8433 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
8434 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
8435 // CHECK3-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
8436 // CHECK3-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
8437 // CHECK3-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
8438 // CHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
8439 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
8440 // CHECK3-NEXT:    store i32 [[TMP4]], ptr [[X]], align 4
8441 // CHECK3-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
8442 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
8443 // CHECK3-NEXT:    store i32 [[TMP5]], ptr [[Y]], align 4
8444 // CHECK3-NEXT:    store ptr [[A]], ptr [[P]], align 64
8445 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
8446 // CHECK3-NEXT:    store i32 [[TMP6]], ptr [[A_CASTED]], align 4
8447 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4
8448 // CHECK3-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[P]], align 64
8449 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr @ga, align 4
8450 // CHECK3-NEXT:    store i32 [[TMP9]], ptr [[GA_CASTED]], align 4
8451 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[GA_CASTED]], align 4
8452 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8453 // CHECK3-NEXT:    store i32 [[TMP7]], ptr [[TMP11]], align 4
8454 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8455 // CHECK3-NEXT:    store i32 [[TMP7]], ptr [[TMP12]], align 4
8456 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
8457 // CHECK3-NEXT:    store ptr null, ptr [[TMP13]], align 4
8458 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
8459 // CHECK3-NEXT:    store ptr [[TMP8]], ptr [[TMP14]], align 4
8460 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
8461 // CHECK3-NEXT:    store ptr [[TMP8]], ptr [[TMP15]], align 4
8462 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
8463 // CHECK3-NEXT:    store ptr null, ptr [[TMP16]], align 4
8464 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
8465 // CHECK3-NEXT:    store i32 [[TMP10]], ptr [[TMP17]], align 4
8466 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
8467 // CHECK3-NEXT:    store i32 [[TMP10]], ptr [[TMP18]], align 4
8468 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
8469 // CHECK3-NEXT:    store ptr null, ptr [[TMP19]], align 4
8470 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8471 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8472 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
8473 // CHECK3-NEXT:    store i32 3, ptr [[TMP22]], align 4
8474 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
8475 // CHECK3-NEXT:    store i32 3, ptr [[TMP23]], align 4
8476 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
8477 // CHECK3-NEXT:    store ptr [[TMP20]], ptr [[TMP24]], align 4
8478 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
8479 // CHECK3-NEXT:    store ptr [[TMP21]], ptr [[TMP25]], align 4
8480 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
8481 // CHECK3-NEXT:    store ptr @.offload_sizes, ptr [[TMP26]], align 4
8482 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
8483 // CHECK3-NEXT:    store ptr @.offload_maptypes, ptr [[TMP27]], align 4
8484 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
8485 // CHECK3-NEXT:    store ptr null, ptr [[TMP28]], align 4
8486 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
8487 // CHECK3-NEXT:    store ptr null, ptr [[TMP29]], align 4
8488 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
8489 // CHECK3-NEXT:    store i64 0, ptr [[TMP30]], align 8
8490 // CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
8491 // CHECK3-NEXT:    store i64 0, ptr [[TMP31]], align 8
8492 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
8493 // CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP32]], align 4
8494 // CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
8495 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4
8496 // CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
8497 // CHECK3-NEXT:    store i32 0, ptr [[TMP34]], align 4
8498 // CHECK3-NEXT:    [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, ptr [[KERNEL_ARGS]])
8499 // CHECK3-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
8500 // CHECK3-NEXT:    br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
8501 // CHECK3:       omp_offload.failed:
8502 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], ptr [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
8503 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
8504 // CHECK3:       omp_offload.cont:
8505 // CHECK3-NEXT:    [[TMP37:%.*]] = load i16, ptr [[AA]], align 2
8506 // CHECK3-NEXT:    store i16 [[TMP37]], ptr [[AA_CASTED]], align 2
8507 // CHECK3-NEXT:    [[TMP38:%.*]] = load i32, ptr [[AA_CASTED]], align 4
8508 // CHECK3-NEXT:    [[TMP39:%.*]] = mul nuw i32 [[TMP0]], 4
8509 // CHECK3-NEXT:    [[TMP40:%.*]] = sext i32 [[TMP39]] to i64
8510 // CHECK3-NEXT:    [[TMP41:%.*]] = mul nuw i32 5, [[TMP2]]
8511 // CHECK3-NEXT:    [[TMP42:%.*]] = mul nuw i32 [[TMP41]], 8
8512 // CHECK3-NEXT:    [[TMP43:%.*]] = sext i32 [[TMP42]] to i64
8513 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 72, i1 false)
8514 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
8515 // CHECK3-NEXT:    store i32 [[TMP38]], ptr [[TMP44]], align 4
8516 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
8517 // CHECK3-NEXT:    store i32 [[TMP38]], ptr [[TMP45]], align 4
8518 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
8519 // CHECK3-NEXT:    store ptr null, ptr [[TMP46]], align 4
8520 // CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
8521 // CHECK3-NEXT:    store ptr [[B]], ptr [[TMP47]], align 4
8522 // CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
8523 // CHECK3-NEXT:    store ptr [[B]], ptr [[TMP48]], align 4
8524 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
8525 // CHECK3-NEXT:    store ptr null, ptr [[TMP49]], align 4
8526 // CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
8527 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[TMP50]], align 4
8528 // CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
8529 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[TMP51]], align 4
8530 // CHECK3-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
8531 // CHECK3-NEXT:    store ptr null, ptr [[TMP52]], align 4
8532 // CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
8533 // CHECK3-NEXT:    store ptr [[VLA]], ptr [[TMP53]], align 4
8534 // CHECK3-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
8535 // CHECK3-NEXT:    store ptr [[VLA]], ptr [[TMP54]], align 4
8536 // CHECK3-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
8537 // CHECK3-NEXT:    store i64 [[TMP40]], ptr [[TMP55]], align 4
8538 // CHECK3-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
8539 // CHECK3-NEXT:    store ptr null, ptr [[TMP56]], align 4
8540 // CHECK3-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
8541 // CHECK3-NEXT:    store ptr [[C]], ptr [[TMP57]], align 4
8542 // CHECK3-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
8543 // CHECK3-NEXT:    store ptr [[C]], ptr [[TMP58]], align 4
8544 // CHECK3-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
8545 // CHECK3-NEXT:    store ptr null, ptr [[TMP59]], align 4
8546 // CHECK3-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
8547 // CHECK3-NEXT:    store i32 5, ptr [[TMP60]], align 4
8548 // CHECK3-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
8549 // CHECK3-NEXT:    store i32 5, ptr [[TMP61]], align 4
8550 // CHECK3-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
8551 // CHECK3-NEXT:    store ptr null, ptr [[TMP62]], align 4
8552 // CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
8553 // CHECK3-NEXT:    store i32 [[TMP2]], ptr [[TMP63]], align 4
8554 // CHECK3-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
8555 // CHECK3-NEXT:    store i32 [[TMP2]], ptr [[TMP64]], align 4
8556 // CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
8557 // CHECK3-NEXT:    store ptr null, ptr [[TMP65]], align 4
8558 // CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
8559 // CHECK3-NEXT:    store ptr [[VLA1]], ptr [[TMP66]], align 4
8560 // CHECK3-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
8561 // CHECK3-NEXT:    store ptr [[VLA1]], ptr [[TMP67]], align 4
8562 // CHECK3-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
8563 // CHECK3-NEXT:    store i64 [[TMP43]], ptr [[TMP68]], align 4
8564 // CHECK3-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
8565 // CHECK3-NEXT:    store ptr null, ptr [[TMP69]], align 4
8566 // CHECK3-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
8567 // CHECK3-NEXT:    store ptr [[D]], ptr [[TMP70]], align 4
8568 // CHECK3-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
8569 // CHECK3-NEXT:    store ptr [[D]], ptr [[TMP71]], align 4
8570 // CHECK3-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
8571 // CHECK3-NEXT:    store ptr null, ptr [[TMP72]], align 4
8572 // CHECK3-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
8573 // CHECK3-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
8574 // CHECK3-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
8575 // CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
8576 // CHECK3-NEXT:    store i32 3, ptr [[TMP76]], align 4
8577 // CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
8578 // CHECK3-NEXT:    store i32 9, ptr [[TMP77]], align 4
8579 // CHECK3-NEXT:    [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
8580 // CHECK3-NEXT:    store ptr [[TMP73]], ptr [[TMP78]], align 4
8581 // CHECK3-NEXT:    [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
8582 // CHECK3-NEXT:    store ptr [[TMP74]], ptr [[TMP79]], align 4
8583 // CHECK3-NEXT:    [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
8584 // CHECK3-NEXT:    store ptr [[TMP75]], ptr [[TMP80]], align 4
8585 // CHECK3-NEXT:    [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
8586 // CHECK3-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP81]], align 4
8587 // CHECK3-NEXT:    [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
8588 // CHECK3-NEXT:    store ptr null, ptr [[TMP82]], align 4
8589 // CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
8590 // CHECK3-NEXT:    store ptr null, ptr [[TMP83]], align 4
8591 // CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
8592 // CHECK3-NEXT:    store i64 0, ptr [[TMP84]], align 8
8593 // CHECK3-NEXT:    [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
8594 // CHECK3-NEXT:    store i64 0, ptr [[TMP85]], align 8
8595 // CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
8596 // CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP86]], align 4
8597 // CHECK3-NEXT:    [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
8598 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP87]], align 4
8599 // CHECK3-NEXT:    [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
8600 // CHECK3-NEXT:    store i32 0, ptr [[TMP88]], align 4
8601 // CHECK3-NEXT:    [[TMP89:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, ptr [[KERNEL_ARGS5]])
8602 // CHECK3-NEXT:    [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0
8603 // CHECK3-NEXT:    br i1 [[TMP90]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
8604 // CHECK3:       omp_offload.failed6:
8605 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP38]], ptr [[B]], i32 [[TMP0]], ptr [[VLA]], ptr [[C]], i32 5, i32 [[TMP2]], ptr [[VLA1]], ptr [[D]]) #[[ATTR3]]
8606 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
8607 // CHECK3:       omp_offload.cont7:
8608 // CHECK3-NEXT:    [[TMP91:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
8609 // CHECK3-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
8610 // CHECK3-NEXT:    store ptr [[TMP91]], ptr [[TMP92]], align 4
8611 // CHECK3-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
8612 // CHECK3-NEXT:    store ptr [[TMP91]], ptr [[TMP93]], align 4
8613 // CHECK3-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
8614 // CHECK3-NEXT:    store ptr null, ptr [[TMP94]], align 4
8615 // CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
8616 // CHECK3-NEXT:    store ptr [[E]], ptr [[TMP95]], align 4
8617 // CHECK3-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
8618 // CHECK3-NEXT:    store ptr [[E]], ptr [[TMP96]], align 4
8619 // CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
8620 // CHECK3-NEXT:    store ptr null, ptr [[TMP97]], align 4
8621 // CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
8622 // CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
8623 // CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0
8624 // CHECK3-NEXT:    store i32 3, ptr [[TMP100]], align 4
8625 // CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1
8626 // CHECK3-NEXT:    store i32 2, ptr [[TMP101]], align 4
8627 // CHECK3-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2
8628 // CHECK3-NEXT:    store ptr [[TMP98]], ptr [[TMP102]], align 4
8629 // CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 3
8630 // CHECK3-NEXT:    store ptr [[TMP99]], ptr [[TMP103]], align 4
8631 // CHECK3-NEXT:    [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 4
8632 // CHECK3-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP104]], align 4
8633 // CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 5
8634 // CHECK3-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP105]], align 4
8635 // CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 6
8636 // CHECK3-NEXT:    store ptr null, ptr [[TMP106]], align 4
8637 // CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 7
8638 // CHECK3-NEXT:    store ptr null, ptr [[TMP107]], align 4
8639 // CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 8
8640 // CHECK3-NEXT:    store i64 0, ptr [[TMP108]], align 8
8641 // CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 9
8642 // CHECK3-NEXT:    store i64 0, ptr [[TMP109]], align 8
8643 // CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 10
8644 // CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP110]], align 4
8645 // CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 11
8646 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP111]], align 4
8647 // CHECK3-NEXT:    [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 12
8648 // CHECK3-NEXT:    store i32 0, ptr [[TMP112]], align 4
8649 // CHECK3-NEXT:    [[TMP113:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, ptr [[KERNEL_ARGS11]])
8650 // CHECK3-NEXT:    [[TMP114:%.*]] = icmp ne i32 [[TMP113]], 0
8651 // CHECK3-NEXT:    br i1 [[TMP114]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
8652 // CHECK3:       omp_offload.failed12:
8653 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(ptr [[TMP91]], ptr [[E]]) #[[ATTR3]]
8654 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
8655 // CHECK3:       omp_offload.cont13:
8656 // CHECK3-NEXT:    [[TMP115:%.*]] = load i32, ptr [[A]], align 4
8657 // CHECK3-NEXT:    [[TMP116:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
8658 // CHECK3-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP116]])
8659 // CHECK3-NEXT:    ret i32 [[TMP115]]
8660 //
8661 //
8662 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
8663 // CHECK3-SAME: (i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
8664 // CHECK3-NEXT:  entry:
8665 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
8666 // CHECK3-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 4
8667 // CHECK3-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
8668 // CHECK3-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
8669 // CHECK3-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 4
8670 // CHECK3-NEXT:    store i32 [[GA]], ptr [[GA_ADDR]], align 4
8671 // CHECK3-NEXT:    ret void
8672 //
8673 //
8674 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
8675 // CHECK3-SAME: (i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
8676 // CHECK3-NEXT:  entry:
8677 // CHECK3-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
8678 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
8679 // CHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
8680 // CHECK3-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 4
8681 // CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
8682 // CHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
8683 // CHECK3-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
8684 // CHECK3-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 4
8685 // CHECK3-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 4
8686 // CHECK3-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
8687 // CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
8688 // CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
8689 // CHECK3-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
8690 // CHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
8691 // CHECK3-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
8692 // CHECK3-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
8693 // CHECK3-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
8694 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
8695 // CHECK3-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
8696 // CHECK3-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 4
8697 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
8698 // CHECK3-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
8699 // CHECK3-NEXT:    store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
8700 // CHECK3-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 4
8701 // CHECK3-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
8702 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
8703 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
8704 // CHECK3-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
8705 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
8706 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
8707 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
8708 // CHECK3-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
8709 // CHECK3-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
8710 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
8711 // CHECK3-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
8712 // CHECK3-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
8713 // CHECK3-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
8714 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
8715 // CHECK3-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
8716 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
8717 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
8718 // CHECK3-NEXT:    [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
8719 // CHECK3-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
8720 // CHECK3-NEXT:    store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
8721 // CHECK3-NEXT:    store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
8722 // CHECK3-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
8723 // CHECK3-NEXT:    [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
8724 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
8725 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
8726 // CHECK3-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
8727 // CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
8728 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
8729 // CHECK3-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
8730 // CHECK3-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
8731 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
8732 // CHECK3-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
8733 // CHECK3-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
8734 // CHECK3-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
8735 // CHECK3-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
8736 // CHECK3-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
8737 // CHECK3-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
8738 // CHECK3-NEXT:    [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
8739 // CHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
8740 // CHECK3-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
8741 // CHECK3-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
8742 // CHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
8743 // CHECK3-NEXT:    store i64 1, ptr [[X]], align 4
8744 // CHECK3-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
8745 // CHECK3-NEXT:    store i8 1, ptr [[Y]], align 4
8746 // CHECK3-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
8747 // CHECK3-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
8748 // CHECK3-NEXT:    ret void
8749 //
8750 //
8751 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
8752 // CHECK3-SAME: (ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
8753 // CHECK3-NEXT:  entry:
8754 // CHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
8755 // CHECK3-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 4
8756 // CHECK3-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
8757 // CHECK3-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
8758 // CHECK3-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 4
8759 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
8760 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E1]], ptr align 4 [[TMP0]], i32 8, i1 false)
8761 // CHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E1]], i32 0, i32 0
8762 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
8763 // CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
8764 // CHECK3-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
8765 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
8766 // CHECK3-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 4
8767 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
8768 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
8769 // CHECK3-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX2]], align 4
8770 // CHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
8771 // CHECK3-NEXT:    store double [[INC]], ptr [[ARRAYIDX2]], align 4
8772 // CHECK3-NEXT:    ret void
8773 //
8774 //
8775 // CHECK3-LABEL: define {{[^@]+}}@_Z3bariPd
8776 // CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
8777 // CHECK3-NEXT:  entry:
8778 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8779 // CHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
8780 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
8781 // CHECK3-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
8782 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8783 // CHECK3-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
8784 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
8785 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
8786 // CHECK3-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
8787 // CHECK3-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
8788 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
8789 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
8790 // CHECK3-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
8791 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
8792 // CHECK3-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
8793 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
8794 // CHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
8795 // CHECK3-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
8796 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
8797 // CHECK3-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
8798 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
8799 // CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
8800 // CHECK3-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
8801 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
8802 // CHECK3-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
8803 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
8804 // CHECK3-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
8805 // CHECK3-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
8806 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
8807 // CHECK3-NEXT:    ret i32 [[TMP9]]
8808 //
8809 //
8810 // CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
8811 // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
8812 // CHECK3-NEXT:  entry:
8813 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
8814 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8815 // CHECK3-NEXT:    [[B:%.*]] = alloca i32, align 4
8816 // CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
8817 // CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
8818 // CHECK3-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
8819 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 4
8820 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
8821 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
8822 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
8823 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
8824 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
8825 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8826 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
8827 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
8828 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
8829 // CHECK3-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
8830 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
8831 // CHECK3-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
8832 // CHECK3-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
8833 // CHECK3-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
8834 // CHECK3-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
8835 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
8836 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B]], align 4
8837 // CHECK3-NEXT:    store i32 [[TMP4]], ptr [[B_CASTED]], align 4
8838 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4
8839 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
8840 // CHECK3-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
8841 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
8842 // CHECK3-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
8843 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.5, i32 40, i1 false)
8844 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8845 // CHECK3-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 4
8846 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8847 // CHECK3-NEXT:    store ptr [[A]], ptr [[TMP10]], align 4
8848 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
8849 // CHECK3-NEXT:    store ptr null, ptr [[TMP11]], align 4
8850 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
8851 // CHECK3-NEXT:    store i32 [[TMP5]], ptr [[TMP12]], align 4
8852 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
8853 // CHECK3-NEXT:    store i32 [[TMP5]], ptr [[TMP13]], align 4
8854 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
8855 // CHECK3-NEXT:    store ptr null, ptr [[TMP14]], align 4
8856 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
8857 // CHECK3-NEXT:    store i32 2, ptr [[TMP15]], align 4
8858 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
8859 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
8860 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
8861 // CHECK3-NEXT:    store ptr null, ptr [[TMP17]], align 4
8862 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
8863 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP18]], align 4
8864 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
8865 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP19]], align 4
8866 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
8867 // CHECK3-NEXT:    store ptr null, ptr [[TMP20]], align 4
8868 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
8869 // CHECK3-NEXT:    store ptr [[VLA]], ptr [[TMP21]], align 4
8870 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
8871 // CHECK3-NEXT:    store ptr [[VLA]], ptr [[TMP22]], align 4
8872 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
8873 // CHECK3-NEXT:    store i64 [[TMP8]], ptr [[TMP23]], align 4
8874 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
8875 // CHECK3-NEXT:    store ptr null, ptr [[TMP24]], align 4
8876 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8877 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8878 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
8879 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
8880 // CHECK3-NEXT:    store i32 3, ptr [[TMP28]], align 4
8881 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
8882 // CHECK3-NEXT:    store i32 5, ptr [[TMP29]], align 4
8883 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
8884 // CHECK3-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 4
8885 // CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
8886 // CHECK3-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 4
8887 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
8888 // CHECK3-NEXT:    store ptr [[TMP27]], ptr [[TMP32]], align 4
8889 // CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
8890 // CHECK3-NEXT:    store ptr @.offload_maptypes.6, ptr [[TMP33]], align 4
8891 // CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
8892 // CHECK3-NEXT:    store ptr null, ptr [[TMP34]], align 4
8893 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
8894 // CHECK3-NEXT:    store ptr null, ptr [[TMP35]], align 4
8895 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
8896 // CHECK3-NEXT:    store i64 0, ptr [[TMP36]], align 8
8897 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
8898 // CHECK3-NEXT:    store i64 0, ptr [[TMP37]], align 8
8899 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
8900 // CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP38]], align 4
8901 // CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
8902 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
8903 // CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
8904 // CHECK3-NEXT:    store i32 0, ptr [[TMP40]], align 4
8905 // CHECK3-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, ptr [[KERNEL_ARGS]])
8906 // CHECK3-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
8907 // CHECK3-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
8908 // CHECK3:       omp_offload.failed:
8909 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(ptr [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3]]
8910 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
8911 // CHECK3:       omp_offload.cont:
8912 // CHECK3-NEXT:    [[TMP43:%.*]] = mul nsw i32 1, [[TMP1]]
8913 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP43]]
8914 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
8915 // CHECK3-NEXT:    [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
8916 // CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP44]] to i32
8917 // CHECK3-NEXT:    [[TMP45:%.*]] = load i32, ptr [[B]], align 4
8918 // CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP45]]
8919 // CHECK3-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
8920 // CHECK3-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP46]])
8921 // CHECK3-NEXT:    ret i32 [[ADD3]]
8922 //
8923 //
8924 // CHECK3-LABEL: define {{[^@]+}}@_ZL7fstatici
8925 // CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
8926 // CHECK3-NEXT:  entry:
8927 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
8928 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
8929 // CHECK3-NEXT:    [[AAA:%.*]] = alloca i8, align 1
8930 // CHECK3-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
8931 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
8932 // CHECK3-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
8933 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
8934 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
8935 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
8936 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
8937 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
8938 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
8939 // CHECK3-NEXT:    store i8 0, ptr [[AAA]], align 1
8940 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
8941 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
8942 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
8943 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA]], align 1
8944 // CHECK3-NEXT:    store i8 [[TMP2]], ptr [[AAA_CASTED]], align 1
8945 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[AAA_CASTED]], align 4
8946 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8947 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP4]], align 4
8948 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8949 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP5]], align 4
8950 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
8951 // CHECK3-NEXT:    store ptr null, ptr [[TMP6]], align 4
8952 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
8953 // CHECK3-NEXT:    store i32 [[TMP3]], ptr [[TMP7]], align 4
8954 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
8955 // CHECK3-NEXT:    store i32 [[TMP3]], ptr [[TMP8]], align 4
8956 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
8957 // CHECK3-NEXT:    store ptr null, ptr [[TMP9]], align 4
8958 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
8959 // CHECK3-NEXT:    store ptr [[B]], ptr [[TMP10]], align 4
8960 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
8961 // CHECK3-NEXT:    store ptr [[B]], ptr [[TMP11]], align 4
8962 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
8963 // CHECK3-NEXT:    store ptr null, ptr [[TMP12]], align 4
8964 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
8965 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
8966 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
8967 // CHECK3-NEXT:    store i32 3, ptr [[TMP15]], align 4
8968 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
8969 // CHECK3-NEXT:    store i32 3, ptr [[TMP16]], align 4
8970 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
8971 // CHECK3-NEXT:    store ptr [[TMP13]], ptr [[TMP17]], align 4
8972 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
8973 // CHECK3-NEXT:    store ptr [[TMP14]], ptr [[TMP18]], align 4
8974 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
8975 // CHECK3-NEXT:    store ptr @.offload_sizes.7, ptr [[TMP19]], align 4
8976 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
8977 // CHECK3-NEXT:    store ptr @.offload_maptypes.8, ptr [[TMP20]], align 4
8978 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
8979 // CHECK3-NEXT:    store ptr null, ptr [[TMP21]], align 4
8980 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
8981 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
8982 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
8983 // CHECK3-NEXT:    store i64 0, ptr [[TMP23]], align 8
8984 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
8985 // CHECK3-NEXT:    store i64 0, ptr [[TMP24]], align 8
8986 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
8987 // CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP25]], align 4
8988 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
8989 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
8990 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
8991 // CHECK3-NEXT:    store i32 0, ptr [[TMP27]], align 4
8992 // CHECK3-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, ptr [[KERNEL_ARGS]])
8993 // CHECK3-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
8994 // CHECK3-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
8995 // CHECK3:       omp_offload.failed:
8996 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], ptr [[B]]) #[[ATTR3]]
8997 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
8998 // CHECK3:       omp_offload.cont:
8999 // CHECK3-NEXT:    [[TMP30:%.*]] = load i32, ptr [[A]], align 4
9000 // CHECK3-NEXT:    ret i32 [[TMP30]]
9001 //
9002 //
9003 // CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
9004 // CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
9005 // CHECK3-NEXT:  entry:
9006 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9007 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
9008 // CHECK3-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9009 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
9010 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
9011 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
9012 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
9013 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
9014 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9015 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
9016 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9017 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
9018 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
9019 // CHECK3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
9020 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP2]], align 4
9021 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
9022 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 4
9023 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
9024 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
9025 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
9026 // CHECK3-NEXT:    store ptr [[B]], ptr [[TMP5]], align 4
9027 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
9028 // CHECK3-NEXT:    store ptr [[B]], ptr [[TMP6]], align 4
9029 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
9030 // CHECK3-NEXT:    store ptr null, ptr [[TMP7]], align 4
9031 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
9032 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
9033 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
9034 // CHECK3-NEXT:    store i32 3, ptr [[TMP10]], align 4
9035 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
9036 // CHECK3-NEXT:    store i32 2, ptr [[TMP11]], align 4
9037 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
9038 // CHECK3-NEXT:    store ptr [[TMP8]], ptr [[TMP12]], align 4
9039 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
9040 // CHECK3-NEXT:    store ptr [[TMP9]], ptr [[TMP13]], align 4
9041 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
9042 // CHECK3-NEXT:    store ptr @.offload_sizes.9, ptr [[TMP14]], align 4
9043 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
9044 // CHECK3-NEXT:    store ptr @.offload_maptypes.10, ptr [[TMP15]], align 4
9045 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
9046 // CHECK3-NEXT:    store ptr null, ptr [[TMP16]], align 4
9047 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
9048 // CHECK3-NEXT:    store ptr null, ptr [[TMP17]], align 4
9049 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
9050 // CHECK3-NEXT:    store i64 0, ptr [[TMP18]], align 8
9051 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
9052 // CHECK3-NEXT:    store i64 0, ptr [[TMP19]], align 8
9053 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
9054 // CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
9055 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
9056 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
9057 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
9058 // CHECK3-NEXT:    store i32 0, ptr [[TMP22]], align 4
9059 // CHECK3-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, ptr [[KERNEL_ARGS]])
9060 // CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
9061 // CHECK3-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
9062 // CHECK3:       omp_offload.failed:
9063 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], ptr [[B]]) #[[ATTR3]]
9064 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
9065 // CHECK3:       omp_offload.cont:
9066 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, ptr [[A]], align 4
9067 // CHECK3-NEXT:    ret i32 [[TMP25]]
9068 //
9069 //
9070 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
9071 // CHECK3-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
9072 // CHECK3-NEXT:  entry:
9073 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
9074 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
9075 // CHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
9076 // CHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
9077 // CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
9078 // CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
9079 // CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
9080 // CHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
9081 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
9082 // CHECK3-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
9083 // CHECK3-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
9084 // CHECK3-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
9085 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
9086 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
9087 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
9088 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
9089 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
9090 // CHECK3-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
9091 // CHECK3-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
9092 // CHECK3-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
9093 // CHECK3-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
9094 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
9095 // CHECK3-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
9096 // CHECK3-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
9097 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
9098 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
9099 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
9100 // CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
9101 // CHECK3-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
9102 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
9103 // CHECK3-NEXT:    store double [[ADD]], ptr [[A]], align 4
9104 // CHECK3-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
9105 // CHECK3-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 4
9106 // CHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
9107 // CHECK3-NEXT:    store double [[INC]], ptr [[A4]], align 4
9108 // CHECK3-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
9109 // CHECK3-NEXT:    [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
9110 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
9111 // CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
9112 // CHECK3-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
9113 // CHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
9114 // CHECK3-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
9115 // CHECK3-NEXT:    ret void
9116 //
9117 //
9118 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
9119 // CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
9120 // CHECK3-NEXT:  entry:
9121 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
9122 // CHECK3-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
9123 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
9124 // CHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
9125 // CHECK3-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
9126 // CHECK3-NEXT:    store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
9127 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
9128 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
9129 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
9130 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
9131 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
9132 // CHECK3-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
9133 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
9134 // CHECK3-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
9135 // CHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
9136 // CHECK3-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
9137 // CHECK3-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
9138 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
9139 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9140 // CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
9141 // CHECK3-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
9142 // CHECK3-NEXT:    ret void
9143 //
9144 //
9145 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
9146 // CHECK3-SAME: (i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
9147 // CHECK3-NEXT:  entry:
9148 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
9149 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
9150 // CHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
9151 // CHECK3-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
9152 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
9153 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
9154 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
9155 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
9156 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
9157 // CHECK3-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
9158 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
9159 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9160 // CHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
9161 // CHECK3-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
9162 // CHECK3-NEXT:    ret void
9163 //
9164 //
9165 // SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3fooiPd
9166 // SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
9167 // SIMD-ONLY0-NEXT:  entry:
9168 // SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9169 // SIMD-ONLY0-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
9170 // SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
9171 // SIMD-ONLY0-NEXT:    [[AA:%.*]] = alloca i16, align 2
9172 // SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
9173 // SIMD-ONLY0-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
9174 // SIMD-ONLY0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
9175 // SIMD-ONLY0-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
9176 // SIMD-ONLY0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
9177 // SIMD-ONLY0-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
9178 // SIMD-ONLY0-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
9179 // SIMD-ONLY0-NEXT:    [[P:%.*]] = alloca ptr, align 64
9180 // SIMD-ONLY0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9181 // SIMD-ONLY0-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
9182 // SIMD-ONLY0-NEXT:    store i32 0, ptr [[A]], align 4
9183 // SIMD-ONLY0-NEXT:    store i16 0, ptr [[AA]], align 2
9184 // SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9185 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
9186 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
9187 // SIMD-ONLY0-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
9188 // SIMD-ONLY0-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
9189 // SIMD-ONLY0-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
9190 // SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
9191 // SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
9192 // SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
9193 // SIMD-ONLY0-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
9194 // SIMD-ONLY0-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
9195 // SIMD-ONLY0-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9196 // SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
9197 // SIMD-ONLY0-NEXT:    store i32 [[TMP6]], ptr [[X]], align 4
9198 // SIMD-ONLY0-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
9199 // SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
9200 // SIMD-ONLY0-NEXT:    store i32 [[TMP7]], ptr [[Y]], align 4
9201 // SIMD-ONLY0-NEXT:    store ptr [[A]], ptr [[P]], align 64
9202 // SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
9203 // SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
9204 // SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
9205 // SIMD-ONLY0-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
9206 // SIMD-ONLY0-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
9207 // SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
9208 // SIMD-ONLY0-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
9209 // SIMD-ONLY0-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
9210 // SIMD-ONLY0-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
9211 // SIMD-ONLY0-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
9212 // SIMD-ONLY0-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
9213 // SIMD-ONLY0-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
9214 // SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
9215 // SIMD-ONLY0-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
9216 // SIMD-ONLY0-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
9217 // SIMD-ONLY0-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
9218 // SIMD-ONLY0-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
9219 // SIMD-ONLY0-NEXT:    store i64 1, ptr [[X8]], align 8
9220 // SIMD-ONLY0-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
9221 // SIMD-ONLY0-NEXT:    store i8 1, ptr [[Y9]], align 8
9222 // SIMD-ONLY0-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9223 // SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
9224 // SIMD-ONLY0-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
9225 // SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
9226 // SIMD-ONLY0-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
9227 // SIMD-ONLY0-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
9228 // SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
9229 // SIMD-ONLY0-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
9230 // SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
9231 // SIMD-ONLY0-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
9232 // SIMD-ONLY0-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 8
9233 // SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i32, ptr [[A]], align 4
9234 // SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
9235 // SIMD-ONLY0-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
9236 // SIMD-ONLY0-NEXT:    ret i32 [[TMP14]]
9237 //
9238 //
9239 // SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3bariPd
9240 // SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
9241 // SIMD-ONLY0-NEXT:  entry:
9242 // SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9243 // SIMD-ONLY0-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
9244 // SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
9245 // SIMD-ONLY0-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
9246 // SIMD-ONLY0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9247 // SIMD-ONLY0-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
9248 // SIMD-ONLY0-NEXT:    store i32 0, ptr [[A]], align 4
9249 // SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9250 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
9251 // SIMD-ONLY0-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
9252 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9253 // SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
9254 // SIMD-ONLY0-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9255 // SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
9256 // SIMD-ONLY0-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
9257 // SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
9258 // SIMD-ONLY0-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
9259 // SIMD-ONLY0-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
9260 // SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
9261 // SIMD-ONLY0-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
9262 // SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
9263 // SIMD-ONLY0-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
9264 // SIMD-ONLY0-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
9265 // SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
9266 // SIMD-ONLY0-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
9267 // SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
9268 // SIMD-ONLY0-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
9269 // SIMD-ONLY0-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
9270 // SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
9271 // SIMD-ONLY0-NEXT:    ret i32 [[TMP9]]
9272 //
9273 //
9274 // SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
9275 // SIMD-ONLY0-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
9276 // SIMD-ONLY0-NEXT:  entry:
9277 // SIMD-ONLY0-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
9278 // SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9279 // SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca i32, align 4
9280 // SIMD-ONLY0-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
9281 // SIMD-ONLY0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
9282 // SIMD-ONLY0-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
9283 // SIMD-ONLY0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9284 // SIMD-ONLY0-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
9285 // SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9286 // SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9287 // SIMD-ONLY0-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
9288 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
9289 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
9290 // SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
9291 // SIMD-ONLY0-NEXT:    store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
9292 // SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
9293 // SIMD-ONLY0-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
9294 // SIMD-ONLY0-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
9295 // SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B]], align 4
9296 // SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
9297 // SIMD-ONLY0-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
9298 // SIMD-ONLY0-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
9299 // SIMD-ONLY0-NEXT:    store double [[ADD2]], ptr [[A]], align 8
9300 // SIMD-ONLY0-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
9301 // SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load double, ptr [[A3]], align 8
9302 // SIMD-ONLY0-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
9303 // SIMD-ONLY0-NEXT:    store double [[INC]], ptr [[A3]], align 8
9304 // SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
9305 // SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
9306 // SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
9307 // SIMD-ONLY0-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
9308 // SIMD-ONLY0-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
9309 // SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
9310 // SIMD-ONLY0-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
9311 // SIMD-ONLY0-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
9312 // SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
9313 // SIMD-ONLY0-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
9314 // SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i32, ptr [[B]], align 4
9315 // SIMD-ONLY0-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
9316 // SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
9317 // SIMD-ONLY0-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
9318 // SIMD-ONLY0-NEXT:    ret i32 [[ADD9]]
9319 //
9320 //
9321 // SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZL7fstatici
9322 // SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
9323 // SIMD-ONLY0-NEXT:  entry:
9324 // SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9325 // SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
9326 // SIMD-ONLY0-NEXT:    [[AAA:%.*]] = alloca i8, align 1
9327 // SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9328 // SIMD-ONLY0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9329 // SIMD-ONLY0-NEXT:    store i32 0, ptr [[A]], align 4
9330 // SIMD-ONLY0-NEXT:    store i8 0, ptr [[AAA]], align 1
9331 // SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9332 // SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9333 // SIMD-ONLY0-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9334 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
9335 // SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
9336 // SIMD-ONLY0-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
9337 // SIMD-ONLY0-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
9338 // SIMD-ONLY0-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
9339 // SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
9340 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9341 // SIMD-ONLY0-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
9342 // SIMD-ONLY0-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
9343 // SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
9344 // SIMD-ONLY0-NEXT:    ret i32 [[TMP3]]
9345 //
9346 //
9347 // SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
9348 // SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
9349 // SIMD-ONLY0-NEXT:  entry:
9350 // SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9351 // SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
9352 // SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9353 // SIMD-ONLY0-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9354 // SIMD-ONLY0-NEXT:    store i32 0, ptr [[A]], align 4
9355 // SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9356 // SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9357 // SIMD-ONLY0-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9358 // SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
9359 // SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9360 // SIMD-ONLY0-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
9361 // SIMD-ONLY0-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
9362 // SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9363 // SIMD-ONLY0-NEXT:    ret i32 [[TMP2]]
9364 //
9365 //
9366 // SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3fooiPd
9367 // SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
9368 // SIMD-ONLY01-NEXT:  entry:
9369 // SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9370 // SIMD-ONLY01-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
9371 // SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
9372 // SIMD-ONLY01-NEXT:    [[AA:%.*]] = alloca i16, align 2
9373 // SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
9374 // SIMD-ONLY01-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
9375 // SIMD-ONLY01-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
9376 // SIMD-ONLY01-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
9377 // SIMD-ONLY01-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
9378 // SIMD-ONLY01-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
9379 // SIMD-ONLY01-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
9380 // SIMD-ONLY01-NEXT:    [[P:%.*]] = alloca ptr, align 64
9381 // SIMD-ONLY01-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9382 // SIMD-ONLY01-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
9383 // SIMD-ONLY01-NEXT:    store i32 0, ptr [[A]], align 4
9384 // SIMD-ONLY01-NEXT:    store i16 0, ptr [[AA]], align 2
9385 // SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9386 // SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
9387 // SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
9388 // SIMD-ONLY01-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
9389 // SIMD-ONLY01-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
9390 // SIMD-ONLY01-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
9391 // SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
9392 // SIMD-ONLY01-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
9393 // SIMD-ONLY01-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
9394 // SIMD-ONLY01-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
9395 // SIMD-ONLY01-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
9396 // SIMD-ONLY01-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9397 // SIMD-ONLY01-NEXT:    [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
9398 // SIMD-ONLY01-NEXT:    store i32 [[TMP6]], ptr [[X]], align 4
9399 // SIMD-ONLY01-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
9400 // SIMD-ONLY01-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
9401 // SIMD-ONLY01-NEXT:    store i32 [[TMP7]], ptr [[Y]], align 4
9402 // SIMD-ONLY01-NEXT:    store ptr [[A]], ptr [[P]], align 64
9403 // SIMD-ONLY01-NEXT:    [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
9404 // SIMD-ONLY01-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
9405 // SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
9406 // SIMD-ONLY01-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
9407 // SIMD-ONLY01-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
9408 // SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
9409 // SIMD-ONLY01-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
9410 // SIMD-ONLY01-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
9411 // SIMD-ONLY01-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
9412 // SIMD-ONLY01-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
9413 // SIMD-ONLY01-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
9414 // SIMD-ONLY01-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
9415 // SIMD-ONLY01-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
9416 // SIMD-ONLY01-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
9417 // SIMD-ONLY01-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
9418 // SIMD-ONLY01-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
9419 // SIMD-ONLY01-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
9420 // SIMD-ONLY01-NEXT:    store i64 1, ptr [[X8]], align 8
9421 // SIMD-ONLY01-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
9422 // SIMD-ONLY01-NEXT:    store i8 1, ptr [[Y9]], align 8
9423 // SIMD-ONLY01-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9424 // SIMD-ONLY01-NEXT:    [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
9425 // SIMD-ONLY01-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
9426 // SIMD-ONLY01-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
9427 // SIMD-ONLY01-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
9428 // SIMD-ONLY01-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
9429 // SIMD-ONLY01-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
9430 // SIMD-ONLY01-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
9431 // SIMD-ONLY01-NEXT:    [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
9432 // SIMD-ONLY01-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
9433 // SIMD-ONLY01-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 8
9434 // SIMD-ONLY01-NEXT:    [[TMP14:%.*]] = load i32, ptr [[A]], align 4
9435 // SIMD-ONLY01-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
9436 // SIMD-ONLY01-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
9437 // SIMD-ONLY01-NEXT:    ret i32 [[TMP14]]
9438 //
9439 //
9440 // SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3bariPd
9441 // SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
9442 // SIMD-ONLY01-NEXT:  entry:
9443 // SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9444 // SIMD-ONLY01-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
9445 // SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
9446 // SIMD-ONLY01-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
9447 // SIMD-ONLY01-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9448 // SIMD-ONLY01-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
9449 // SIMD-ONLY01-NEXT:    store i32 0, ptr [[A]], align 4
9450 // SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9451 // SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
9452 // SIMD-ONLY01-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
9453 // SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9454 // SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
9455 // SIMD-ONLY01-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9456 // SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
9457 // SIMD-ONLY01-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
9458 // SIMD-ONLY01-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
9459 // SIMD-ONLY01-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
9460 // SIMD-ONLY01-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
9461 // SIMD-ONLY01-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
9462 // SIMD-ONLY01-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
9463 // SIMD-ONLY01-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
9464 // SIMD-ONLY01-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
9465 // SIMD-ONLY01-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
9466 // SIMD-ONLY01-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
9467 // SIMD-ONLY01-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
9468 // SIMD-ONLY01-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
9469 // SIMD-ONLY01-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
9470 // SIMD-ONLY01-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
9471 // SIMD-ONLY01-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
9472 // SIMD-ONLY01-NEXT:    ret i32 [[TMP9]]
9473 //
9474 //
9475 // SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
9476 // SIMD-ONLY01-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
9477 // SIMD-ONLY01-NEXT:  entry:
9478 // SIMD-ONLY01-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
9479 // SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9480 // SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca i32, align 4
9481 // SIMD-ONLY01-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
9482 // SIMD-ONLY01-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
9483 // SIMD-ONLY01-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
9484 // SIMD-ONLY01-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9485 // SIMD-ONLY01-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
9486 // SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9487 // SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9488 // SIMD-ONLY01-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
9489 // SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
9490 // SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
9491 // SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
9492 // SIMD-ONLY01-NEXT:    store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
9493 // SIMD-ONLY01-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
9494 // SIMD-ONLY01-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
9495 // SIMD-ONLY01-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
9496 // SIMD-ONLY01-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B]], align 4
9497 // SIMD-ONLY01-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
9498 // SIMD-ONLY01-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
9499 // SIMD-ONLY01-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
9500 // SIMD-ONLY01-NEXT:    store double [[ADD2]], ptr [[A]], align 8
9501 // SIMD-ONLY01-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
9502 // SIMD-ONLY01-NEXT:    [[TMP6:%.*]] = load double, ptr [[A3]], align 8
9503 // SIMD-ONLY01-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
9504 // SIMD-ONLY01-NEXT:    store double [[INC]], ptr [[A3]], align 8
9505 // SIMD-ONLY01-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
9506 // SIMD-ONLY01-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
9507 // SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
9508 // SIMD-ONLY01-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
9509 // SIMD-ONLY01-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
9510 // SIMD-ONLY01-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
9511 // SIMD-ONLY01-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
9512 // SIMD-ONLY01-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
9513 // SIMD-ONLY01-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
9514 // SIMD-ONLY01-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
9515 // SIMD-ONLY01-NEXT:    [[TMP10:%.*]] = load i32, ptr [[B]], align 4
9516 // SIMD-ONLY01-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
9517 // SIMD-ONLY01-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
9518 // SIMD-ONLY01-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
9519 // SIMD-ONLY01-NEXT:    ret i32 [[ADD9]]
9520 //
9521 //
9522 // SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZL7fstatici
9523 // SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
9524 // SIMD-ONLY01-NEXT:  entry:
9525 // SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9526 // SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
9527 // SIMD-ONLY01-NEXT:    [[AAA:%.*]] = alloca i8, align 1
9528 // SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9529 // SIMD-ONLY01-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9530 // SIMD-ONLY01-NEXT:    store i32 0, ptr [[A]], align 4
9531 // SIMD-ONLY01-NEXT:    store i8 0, ptr [[AAA]], align 1
9532 // SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9533 // SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9534 // SIMD-ONLY01-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9535 // SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
9536 // SIMD-ONLY01-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
9537 // SIMD-ONLY01-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
9538 // SIMD-ONLY01-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
9539 // SIMD-ONLY01-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
9540 // SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
9541 // SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9542 // SIMD-ONLY01-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
9543 // SIMD-ONLY01-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
9544 // SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
9545 // SIMD-ONLY01-NEXT:    ret i32 [[TMP3]]
9546 //
9547 //
9548 // SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
9549 // SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
9550 // SIMD-ONLY01-NEXT:  entry:
9551 // SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9552 // SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
9553 // SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9554 // SIMD-ONLY01-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9555 // SIMD-ONLY01-NEXT:    store i32 0, ptr [[A]], align 4
9556 // SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9557 // SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9558 // SIMD-ONLY01-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9559 // SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
9560 // SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9561 // SIMD-ONLY01-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
9562 // SIMD-ONLY01-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
9563 // SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9564 // SIMD-ONLY01-NEXT:    ret i32 [[TMP2]]
9565 //
9566 //
9567 // SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3fooiPd
9568 // SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
9569 // SIMD-ONLY02-NEXT:  entry:
9570 // SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9571 // SIMD-ONLY02-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
9572 // SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
9573 // SIMD-ONLY02-NEXT:    [[AA:%.*]] = alloca i16, align 2
9574 // SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
9575 // SIMD-ONLY02-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
9576 // SIMD-ONLY02-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
9577 // SIMD-ONLY02-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
9578 // SIMD-ONLY02-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
9579 // SIMD-ONLY02-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
9580 // SIMD-ONLY02-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
9581 // SIMD-ONLY02-NEXT:    [[P:%.*]] = alloca ptr, align 64
9582 // SIMD-ONLY02-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9583 // SIMD-ONLY02-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
9584 // SIMD-ONLY02-NEXT:    store i32 0, ptr [[A]], align 4
9585 // SIMD-ONLY02-NEXT:    store i16 0, ptr [[AA]], align 2
9586 // SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9587 // SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
9588 // SIMD-ONLY02-NEXT:    store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
9589 // SIMD-ONLY02-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
9590 // SIMD-ONLY02-NEXT:    store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
9591 // SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
9592 // SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
9593 // SIMD-ONLY02-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
9594 // SIMD-ONLY02-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
9595 // SIMD-ONLY02-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9596 // SIMD-ONLY02-NEXT:    [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
9597 // SIMD-ONLY02-NEXT:    store i32 [[TMP4]], ptr [[X]], align 4
9598 // SIMD-ONLY02-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
9599 // SIMD-ONLY02-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
9600 // SIMD-ONLY02-NEXT:    store i32 [[TMP5]], ptr [[Y]], align 4
9601 // SIMD-ONLY02-NEXT:    store ptr [[A]], ptr [[P]], align 64
9602 // SIMD-ONLY02-NEXT:    [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
9603 // SIMD-ONLY02-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
9604 // SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
9605 // SIMD-ONLY02-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
9606 // SIMD-ONLY02-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
9607 // SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
9608 // SIMD-ONLY02-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
9609 // SIMD-ONLY02-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
9610 // SIMD-ONLY02-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
9611 // SIMD-ONLY02-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
9612 // SIMD-ONLY02-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
9613 // SIMD-ONLY02-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
9614 // SIMD-ONLY02-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
9615 // SIMD-ONLY02-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
9616 // SIMD-ONLY02-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
9617 // SIMD-ONLY02-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
9618 // SIMD-ONLY02-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
9619 // SIMD-ONLY02-NEXT:    store i64 1, ptr [[X8]], align 4
9620 // SIMD-ONLY02-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
9621 // SIMD-ONLY02-NEXT:    store i8 1, ptr [[Y9]], align 4
9622 // SIMD-ONLY02-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9623 // SIMD-ONLY02-NEXT:    [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
9624 // SIMD-ONLY02-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
9625 // SIMD-ONLY02-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
9626 // SIMD-ONLY02-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
9627 // SIMD-ONLY02-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
9628 // SIMD-ONLY02-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
9629 // SIMD-ONLY02-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
9630 // SIMD-ONLY02-NEXT:    [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
9631 // SIMD-ONLY02-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
9632 // SIMD-ONLY02-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 4
9633 // SIMD-ONLY02-NEXT:    [[TMP12:%.*]] = load i32, ptr [[A]], align 4
9634 // SIMD-ONLY02-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
9635 // SIMD-ONLY02-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP13]])
9636 // SIMD-ONLY02-NEXT:    ret i32 [[TMP12]]
9637 //
9638 //
9639 // SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3bariPd
9640 // SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
9641 // SIMD-ONLY02-NEXT:  entry:
9642 // SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9643 // SIMD-ONLY02-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
9644 // SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
9645 // SIMD-ONLY02-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
9646 // SIMD-ONLY02-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9647 // SIMD-ONLY02-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
9648 // SIMD-ONLY02-NEXT:    store i32 0, ptr [[A]], align 4
9649 // SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9650 // SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
9651 // SIMD-ONLY02-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
9652 // SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9653 // SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
9654 // SIMD-ONLY02-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9655 // SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
9656 // SIMD-ONLY02-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
9657 // SIMD-ONLY02-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
9658 // SIMD-ONLY02-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
9659 // SIMD-ONLY02-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
9660 // SIMD-ONLY02-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
9661 // SIMD-ONLY02-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
9662 // SIMD-ONLY02-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
9663 // SIMD-ONLY02-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
9664 // SIMD-ONLY02-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
9665 // SIMD-ONLY02-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
9666 // SIMD-ONLY02-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
9667 // SIMD-ONLY02-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
9668 // SIMD-ONLY02-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
9669 // SIMD-ONLY02-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
9670 // SIMD-ONLY02-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
9671 // SIMD-ONLY02-NEXT:    ret i32 [[TMP9]]
9672 //
9673 //
9674 // SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
9675 // SIMD-ONLY02-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
9676 // SIMD-ONLY02-NEXT:  entry:
9677 // SIMD-ONLY02-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
9678 // SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9679 // SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca i32, align 4
9680 // SIMD-ONLY02-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
9681 // SIMD-ONLY02-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
9682 // SIMD-ONLY02-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
9683 // SIMD-ONLY02-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9684 // SIMD-ONLY02-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
9685 // SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9686 // SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9687 // SIMD-ONLY02-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
9688 // SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
9689 // SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
9690 // SIMD-ONLY02-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
9691 // SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
9692 // SIMD-ONLY02-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
9693 // SIMD-ONLY02-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
9694 // SIMD-ONLY02-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B]], align 4
9695 // SIMD-ONLY02-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
9696 // SIMD-ONLY02-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
9697 // SIMD-ONLY02-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
9698 // SIMD-ONLY02-NEXT:    store double [[ADD2]], ptr [[A]], align 4
9699 // SIMD-ONLY02-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
9700 // SIMD-ONLY02-NEXT:    [[TMP5:%.*]] = load double, ptr [[A3]], align 4
9701 // SIMD-ONLY02-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
9702 // SIMD-ONLY02-NEXT:    store double [[INC]], ptr [[A3]], align 4
9703 // SIMD-ONLY02-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
9704 // SIMD-ONLY02-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
9705 // SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
9706 // SIMD-ONLY02-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
9707 // SIMD-ONLY02-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
9708 // SIMD-ONLY02-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
9709 // SIMD-ONLY02-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
9710 // SIMD-ONLY02-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
9711 // SIMD-ONLY02-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
9712 // SIMD-ONLY02-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
9713 // SIMD-ONLY02-NEXT:    [[TMP9:%.*]] = load i32, ptr [[B]], align 4
9714 // SIMD-ONLY02-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
9715 // SIMD-ONLY02-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
9716 // SIMD-ONLY02-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP10]])
9717 // SIMD-ONLY02-NEXT:    ret i32 [[ADD9]]
9718 //
9719 //
9720 // SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZL7fstatici
9721 // SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
9722 // SIMD-ONLY02-NEXT:  entry:
9723 // SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9724 // SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
9725 // SIMD-ONLY02-NEXT:    [[AAA:%.*]] = alloca i8, align 1
9726 // SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9727 // SIMD-ONLY02-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9728 // SIMD-ONLY02-NEXT:    store i32 0, ptr [[A]], align 4
9729 // SIMD-ONLY02-NEXT:    store i8 0, ptr [[AAA]], align 1
9730 // SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9731 // SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9732 // SIMD-ONLY02-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9733 // SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
9734 // SIMD-ONLY02-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
9735 // SIMD-ONLY02-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
9736 // SIMD-ONLY02-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
9737 // SIMD-ONLY02-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
9738 // SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
9739 // SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9740 // SIMD-ONLY02-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
9741 // SIMD-ONLY02-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
9742 // SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
9743 // SIMD-ONLY02-NEXT:    ret i32 [[TMP3]]
9744 //
9745 //
9746 // SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
9747 // SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
9748 // SIMD-ONLY02-NEXT:  entry:
9749 // SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9750 // SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
9751 // SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9752 // SIMD-ONLY02-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9753 // SIMD-ONLY02-NEXT:    store i32 0, ptr [[A]], align 4
9754 // SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9755 // SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9756 // SIMD-ONLY02-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9757 // SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
9758 // SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9759 // SIMD-ONLY02-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
9760 // SIMD-ONLY02-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
9761 // SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9762 // SIMD-ONLY02-NEXT:    ret i32 [[TMP2]]
9763 //
9764 //
9765 // SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3fooiPd
9766 // SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
9767 // SIMD-ONLY03-NEXT:  entry:
9768 // SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9769 // SIMD-ONLY03-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
9770 // SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
9771 // SIMD-ONLY03-NEXT:    [[AA:%.*]] = alloca i16, align 2
9772 // SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
9773 // SIMD-ONLY03-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
9774 // SIMD-ONLY03-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
9775 // SIMD-ONLY03-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
9776 // SIMD-ONLY03-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
9777 // SIMD-ONLY03-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
9778 // SIMD-ONLY03-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
9779 // SIMD-ONLY03-NEXT:    [[P:%.*]] = alloca ptr, align 64
9780 // SIMD-ONLY03-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9781 // SIMD-ONLY03-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
9782 // SIMD-ONLY03-NEXT:    store i32 0, ptr [[A]], align 4
9783 // SIMD-ONLY03-NEXT:    store i16 0, ptr [[AA]], align 2
9784 // SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9785 // SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
9786 // SIMD-ONLY03-NEXT:    store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
9787 // SIMD-ONLY03-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
9788 // SIMD-ONLY03-NEXT:    store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
9789 // SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
9790 // SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
9791 // SIMD-ONLY03-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
9792 // SIMD-ONLY03-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
9793 // SIMD-ONLY03-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9794 // SIMD-ONLY03-NEXT:    [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
9795 // SIMD-ONLY03-NEXT:    store i32 [[TMP4]], ptr [[X]], align 4
9796 // SIMD-ONLY03-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
9797 // SIMD-ONLY03-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
9798 // SIMD-ONLY03-NEXT:    store i32 [[TMP5]], ptr [[Y]], align 4
9799 // SIMD-ONLY03-NEXT:    store ptr [[A]], ptr [[P]], align 64
9800 // SIMD-ONLY03-NEXT:    [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
9801 // SIMD-ONLY03-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
9802 // SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
9803 // SIMD-ONLY03-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
9804 // SIMD-ONLY03-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
9805 // SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
9806 // SIMD-ONLY03-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
9807 // SIMD-ONLY03-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
9808 // SIMD-ONLY03-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
9809 // SIMD-ONLY03-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
9810 // SIMD-ONLY03-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
9811 // SIMD-ONLY03-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
9812 // SIMD-ONLY03-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
9813 // SIMD-ONLY03-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
9814 // SIMD-ONLY03-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
9815 // SIMD-ONLY03-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
9816 // SIMD-ONLY03-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
9817 // SIMD-ONLY03-NEXT:    store i64 1, ptr [[X8]], align 4
9818 // SIMD-ONLY03-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
9819 // SIMD-ONLY03-NEXT:    store i8 1, ptr [[Y9]], align 4
9820 // SIMD-ONLY03-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
9821 // SIMD-ONLY03-NEXT:    [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
9822 // SIMD-ONLY03-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
9823 // SIMD-ONLY03-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
9824 // SIMD-ONLY03-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
9825 // SIMD-ONLY03-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
9826 // SIMD-ONLY03-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
9827 // SIMD-ONLY03-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
9828 // SIMD-ONLY03-NEXT:    [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
9829 // SIMD-ONLY03-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
9830 // SIMD-ONLY03-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 4
9831 // SIMD-ONLY03-NEXT:    [[TMP12:%.*]] = load i32, ptr [[A]], align 4
9832 // SIMD-ONLY03-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
9833 // SIMD-ONLY03-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP13]])
9834 // SIMD-ONLY03-NEXT:    ret i32 [[TMP12]]
9835 //
9836 //
9837 // SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3bariPd
9838 // SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
9839 // SIMD-ONLY03-NEXT:  entry:
9840 // SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9841 // SIMD-ONLY03-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
9842 // SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
9843 // SIMD-ONLY03-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
9844 // SIMD-ONLY03-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9845 // SIMD-ONLY03-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
9846 // SIMD-ONLY03-NEXT:    store i32 0, ptr [[A]], align 4
9847 // SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9848 // SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
9849 // SIMD-ONLY03-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
9850 // SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9851 // SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
9852 // SIMD-ONLY03-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9853 // SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
9854 // SIMD-ONLY03-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
9855 // SIMD-ONLY03-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
9856 // SIMD-ONLY03-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
9857 // SIMD-ONLY03-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
9858 // SIMD-ONLY03-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
9859 // SIMD-ONLY03-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
9860 // SIMD-ONLY03-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
9861 // SIMD-ONLY03-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
9862 // SIMD-ONLY03-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
9863 // SIMD-ONLY03-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
9864 // SIMD-ONLY03-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
9865 // SIMD-ONLY03-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
9866 // SIMD-ONLY03-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
9867 // SIMD-ONLY03-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
9868 // SIMD-ONLY03-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
9869 // SIMD-ONLY03-NEXT:    ret i32 [[TMP9]]
9870 //
9871 //
9872 // SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
9873 // SIMD-ONLY03-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
9874 // SIMD-ONLY03-NEXT:  entry:
9875 // SIMD-ONLY03-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
9876 // SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9877 // SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca i32, align 4
9878 // SIMD-ONLY03-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
9879 // SIMD-ONLY03-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
9880 // SIMD-ONLY03-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
9881 // SIMD-ONLY03-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9882 // SIMD-ONLY03-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
9883 // SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
9884 // SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9885 // SIMD-ONLY03-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
9886 // SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
9887 // SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
9888 // SIMD-ONLY03-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
9889 // SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
9890 // SIMD-ONLY03-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
9891 // SIMD-ONLY03-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
9892 // SIMD-ONLY03-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B]], align 4
9893 // SIMD-ONLY03-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
9894 // SIMD-ONLY03-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
9895 // SIMD-ONLY03-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
9896 // SIMD-ONLY03-NEXT:    store double [[ADD2]], ptr [[A]], align 4
9897 // SIMD-ONLY03-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
9898 // SIMD-ONLY03-NEXT:    [[TMP5:%.*]] = load double, ptr [[A3]], align 4
9899 // SIMD-ONLY03-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
9900 // SIMD-ONLY03-NEXT:    store double [[INC]], ptr [[A3]], align 4
9901 // SIMD-ONLY03-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
9902 // SIMD-ONLY03-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
9903 // SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
9904 // SIMD-ONLY03-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
9905 // SIMD-ONLY03-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
9906 // SIMD-ONLY03-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
9907 // SIMD-ONLY03-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
9908 // SIMD-ONLY03-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
9909 // SIMD-ONLY03-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
9910 // SIMD-ONLY03-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
9911 // SIMD-ONLY03-NEXT:    [[TMP9:%.*]] = load i32, ptr [[B]], align 4
9912 // SIMD-ONLY03-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
9913 // SIMD-ONLY03-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
9914 // SIMD-ONLY03-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP10]])
9915 // SIMD-ONLY03-NEXT:    ret i32 [[ADD9]]
9916 //
9917 //
9918 // SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZL7fstatici
9919 // SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
9920 // SIMD-ONLY03-NEXT:  entry:
9921 // SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9922 // SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
9923 // SIMD-ONLY03-NEXT:    [[AAA:%.*]] = alloca i8, align 1
9924 // SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9925 // SIMD-ONLY03-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9926 // SIMD-ONLY03-NEXT:    store i32 0, ptr [[A]], align 4
9927 // SIMD-ONLY03-NEXT:    store i8 0, ptr [[AAA]], align 1
9928 // SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9929 // SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9930 // SIMD-ONLY03-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9931 // SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
9932 // SIMD-ONLY03-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
9933 // SIMD-ONLY03-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
9934 // SIMD-ONLY03-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
9935 // SIMD-ONLY03-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
9936 // SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
9937 // SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9938 // SIMD-ONLY03-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
9939 // SIMD-ONLY03-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
9940 // SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
9941 // SIMD-ONLY03-NEXT:    ret i32 [[TMP3]]
9942 //
9943 //
9944 // SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
9945 // SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
9946 // SIMD-ONLY03-NEXT:  entry:
9947 // SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
9948 // SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
9949 // SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
9950 // SIMD-ONLY03-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
9951 // SIMD-ONLY03-NEXT:    store i32 0, ptr [[A]], align 4
9952 // SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
9953 // SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
9954 // SIMD-ONLY03-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
9955 // SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
9956 // SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
9957 // SIMD-ONLY03-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
9958 // SIMD-ONLY03-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
9959 // SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
9960 // SIMD-ONLY03-NEXT:    ret i32 [[TMP2]]
9961 //
9962 //
9963 // TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
9964 // TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
9965 // TCHECK-NEXT:  entry:
9966 // TCHECK-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
9967 // TCHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
9968 // TCHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
9969 // TCHECK-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
9970 // TCHECK-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
9971 // TCHECK-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
9972 // TCHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
9973 // TCHECK-NEXT:    store i64 [[GA]], ptr [[GA_ADDR]], align 8
9974 // TCHECK-NEXT:    ret void
9975 //
9976 //
9977 // TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
9978 // TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
9979 // TCHECK-NEXT:  entry:
9980 // TCHECK-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
9981 // TCHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
9982 // TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
9983 // TCHECK-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
9984 // TCHECK-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 8
9985 // TCHECK-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
9986 // TCHECK-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
9987 // TCHECK-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
9988 // TCHECK-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 8
9989 // TCHECK-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 8
9990 // TCHECK-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
9991 // TCHECK-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
9992 // TCHECK-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
9993 // TCHECK-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
9994 // TCHECK-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
9995 // TCHECK-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
9996 // TCHECK-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
9997 // TCHECK-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
9998 // TCHECK-NEXT:    store i64 [[AA]], ptr [[AA_ADDR]], align 8
9999 // TCHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
10000 // TCHECK-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
10001 // TCHECK-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 8
10002 // TCHECK-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
10003 // TCHECK-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
10004 // TCHECK-NEXT:    store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
10005 // TCHECK-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 8
10006 // TCHECK-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
10007 // TCHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
10008 // TCHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
10009 // TCHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
10010 // TCHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
10011 // TCHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
10012 // TCHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
10013 // TCHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
10014 // TCHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
10015 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
10016 // TCHECK-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
10017 // TCHECK-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
10018 // TCHECK-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
10019 // TCHECK-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
10020 // TCHECK-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
10021 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
10022 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
10023 // TCHECK-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
10024 // TCHECK-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
10025 // TCHECK-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
10026 // TCHECK-NEXT:    store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
10027 // TCHECK-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
10028 // TCHECK-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
10029 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
10030 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
10031 // TCHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
10032 // TCHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
10033 // TCHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
10034 // TCHECK-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
10035 // TCHECK-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
10036 // TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
10037 // TCHECK-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
10038 // TCHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
10039 // TCHECK-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
10040 // TCHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
10041 // TCHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
10042 // TCHECK-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
10043 // TCHECK-NEXT:    [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
10044 // TCHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
10045 // TCHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
10046 // TCHECK-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
10047 // TCHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
10048 // TCHECK-NEXT:    store i64 1, ptr [[X]], align 8
10049 // TCHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
10050 // TCHECK-NEXT:    store i8 1, ptr [[Y]], align 8
10051 // TCHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
10052 // TCHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
10053 // TCHECK-NEXT:    ret void
10054 //
10055 //
10056 // TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
10057 // TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
10058 // TCHECK-NEXT:  entry:
10059 // TCHECK-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10060 // TCHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
10061 // TCHECK-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
10062 // TCHECK-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10063 // TCHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
10064 // TCHECK-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
10065 // TCHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
10066 // TCHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
10067 // TCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
10068 // TCHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
10069 // TCHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
10070 // TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
10071 // TCHECK-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 8
10072 // TCHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
10073 // TCHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
10074 // TCHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 8
10075 // TCHECK-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
10076 // TCHECK-NEXT:    store double [[INC]], ptr [[ARRAYIDX1]], align 8
10077 // TCHECK-NEXT:    ret void
10078 //
10079 //
10080 // TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
10081 // TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10082 // TCHECK-NEXT:  entry:
10083 // TCHECK-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10084 // TCHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
10085 // TCHECK-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
10086 // TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
10087 // TCHECK-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10088 // TCHECK-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10089 // TCHECK-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
10090 // TCHECK-NEXT:    store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
10091 // TCHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
10092 // TCHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
10093 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
10094 // TCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10095 // TCHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10096 // TCHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10097 // TCHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
10098 // TCHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
10099 // TCHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
10100 // TCHECK-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
10101 // TCHECK-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
10102 // TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
10103 // TCHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10104 // TCHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
10105 // TCHECK-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
10106 // TCHECK-NEXT:    ret void
10107 //
10108 //
10109 // TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
10110 // TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
10111 // TCHECK-NEXT:  entry:
10112 // TCHECK-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10113 // TCHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
10114 // TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
10115 // TCHECK-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
10116 // TCHECK-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
10117 // TCHECK-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
10118 // TCHECK-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
10119 // TCHECK-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
10120 // TCHECK-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
10121 // TCHECK-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10122 // TCHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
10123 // TCHECK-NEXT:    store i64 [[B]], ptr [[B_ADDR]], align 8
10124 // TCHECK-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
10125 // TCHECK-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
10126 // TCHECK-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
10127 // TCHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
10128 // TCHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
10129 // TCHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
10130 // TCHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
10131 // TCHECK-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
10132 // TCHECK-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
10133 // TCHECK-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
10134 // TCHECK-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
10135 // TCHECK-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
10136 // TCHECK-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
10137 // TCHECK-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
10138 // TCHECK-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
10139 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
10140 // TCHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
10141 // TCHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
10142 // TCHECK-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
10143 // TCHECK-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
10144 // TCHECK-NEXT:    store double [[ADD]], ptr [[A]], align 8
10145 // TCHECK-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
10146 // TCHECK-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 8
10147 // TCHECK-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
10148 // TCHECK-NEXT:    store double [[INC]], ptr [[A4]], align 8
10149 // TCHECK-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
10150 // TCHECK-NEXT:    [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
10151 // TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
10152 // TCHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
10153 // TCHECK-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
10154 // TCHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
10155 // TCHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
10156 // TCHECK-NEXT:    ret void
10157 //
10158 //
10159 // TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
10160 // TCHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10161 // TCHECK-NEXT:  entry:
10162 // TCHECK-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10163 // TCHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
10164 // TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
10165 // TCHECK-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10166 // TCHECK-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10167 // TCHECK-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
10168 // TCHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
10169 // TCHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
10170 // TCHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
10171 // TCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10172 // TCHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10173 // TCHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10174 // TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
10175 // TCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10176 // TCHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
10177 // TCHECK-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
10178 // TCHECK-NEXT:    ret void
10179 //
10180 //
10181 // TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
10182 // TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
10183 // TCHECK1-NEXT:  entry:
10184 // TCHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10185 // TCHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
10186 // TCHECK1-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
10187 // TCHECK1-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
10188 // TCHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10189 // TCHECK1-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
10190 // TCHECK1-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
10191 // TCHECK1-NEXT:    store i64 [[GA]], ptr [[GA_ADDR]], align 8
10192 // TCHECK1-NEXT:    ret void
10193 //
10194 //
10195 // TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
10196 // TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
10197 // TCHECK1-NEXT:  entry:
10198 // TCHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10199 // TCHECK1-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
10200 // TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
10201 // TCHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
10202 // TCHECK1-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 8
10203 // TCHECK1-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
10204 // TCHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
10205 // TCHECK1-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
10206 // TCHECK1-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 8
10207 // TCHECK1-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 8
10208 // TCHECK1-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
10209 // TCHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
10210 // TCHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
10211 // TCHECK1-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
10212 // TCHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
10213 // TCHECK1-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
10214 // TCHECK1-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
10215 // TCHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10216 // TCHECK1-NEXT:    store i64 [[AA]], ptr [[AA_ADDR]], align 8
10217 // TCHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
10218 // TCHECK1-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
10219 // TCHECK1-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 8
10220 // TCHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
10221 // TCHECK1-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
10222 // TCHECK1-NEXT:    store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
10223 // TCHECK1-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 8
10224 // TCHECK1-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
10225 // TCHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
10226 // TCHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
10227 // TCHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
10228 // TCHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
10229 // TCHECK1-NEXT:    [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
10230 // TCHECK1-NEXT:    [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
10231 // TCHECK1-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
10232 // TCHECK1-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
10233 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i64 40, i1 false)
10234 // TCHECK1-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
10235 // TCHECK1-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8
10236 // TCHECK1-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
10237 // TCHECK1-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
10238 // TCHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP1]], 4
10239 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i64 [[TMP9]], i1 false)
10240 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i64 400, i1 false)
10241 // TCHECK1-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
10242 // TCHECK1-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP10]], align 8
10243 // TCHECK1-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
10244 // TCHECK1-NEXT:    store i64 [[TMP5]], ptr [[__VLA_EXPR2]], align 8
10245 // TCHECK1-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
10246 // TCHECK1-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
10247 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i64 [[TMP12]], i1 false)
10248 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D9]], ptr align 8 [[TMP7]], i64 16, i1 false)
10249 // TCHECK1-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
10250 // TCHECK1-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
10251 // TCHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
10252 // TCHECK1-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
10253 // TCHECK1-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
10254 // TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i64 0, i64 2
10255 // TCHECK1-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
10256 // TCHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i64 3
10257 // TCHECK1-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
10258 // TCHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i64 0, i64 1
10259 // TCHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2
10260 // TCHECK1-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
10261 // TCHECK1-NEXT:    [[TMP14:%.*]] = mul nsw i64 1, [[TMP5]]
10262 // TCHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i64 [[TMP14]]
10263 // TCHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3
10264 // TCHECK1-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
10265 // TCHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
10266 // TCHECK1-NEXT:    store i64 1, ptr [[X]], align 8
10267 // TCHECK1-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
10268 // TCHECK1-NEXT:    store i8 1, ptr [[Y]], align 8
10269 // TCHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
10270 // TCHECK1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
10271 // TCHECK1-NEXT:    ret void
10272 //
10273 //
10274 // TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
10275 // TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
10276 // TCHECK1-NEXT:  entry:
10277 // TCHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10278 // TCHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
10279 // TCHECK1-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
10280 // TCHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10281 // TCHECK1-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
10282 // TCHECK1-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
10283 // TCHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
10284 // TCHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
10285 // TCHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
10286 // TCHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
10287 // TCHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
10288 // TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0
10289 // TCHECK1-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 8
10290 // TCHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
10291 // TCHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
10292 // TCHECK1-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 8
10293 // TCHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
10294 // TCHECK1-NEXT:    store double [[INC]], ptr [[ARRAYIDX1]], align 8
10295 // TCHECK1-NEXT:    ret void
10296 //
10297 //
10298 // TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
10299 // TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10300 // TCHECK1-NEXT:  entry:
10301 // TCHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10302 // TCHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
10303 // TCHECK1-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
10304 // TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
10305 // TCHECK1-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10306 // TCHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10307 // TCHECK1-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
10308 // TCHECK1-NEXT:    store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
10309 // TCHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
10310 // TCHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
10311 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
10312 // TCHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10313 // TCHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10314 // TCHECK1-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10315 // TCHECK1-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
10316 // TCHECK1-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
10317 // TCHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
10318 // TCHECK1-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
10319 // TCHECK1-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
10320 // TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
10321 // TCHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10322 // TCHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
10323 // TCHECK1-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
10324 // TCHECK1-NEXT:    ret void
10325 //
10326 //
10327 // TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
10328 // TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
10329 // TCHECK1-NEXT:  entry:
10330 // TCHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10331 // TCHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
10332 // TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
10333 // TCHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
10334 // TCHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
10335 // TCHECK1-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
10336 // TCHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
10337 // TCHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
10338 // TCHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
10339 // TCHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10340 // TCHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
10341 // TCHECK1-NEXT:    store i64 [[B]], ptr [[B_ADDR]], align 8
10342 // TCHECK1-NEXT:    store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
10343 // TCHECK1-NEXT:    store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
10344 // TCHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
10345 // TCHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
10346 // TCHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
10347 // TCHECK1-NEXT:    [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
10348 // TCHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
10349 // TCHECK1-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
10350 // TCHECK1-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8
10351 // TCHECK1-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
10352 // TCHECK1-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
10353 // TCHECK1-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
10354 // TCHECK1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR1]], align 8
10355 // TCHECK1-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
10356 // TCHECK1-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
10357 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i64 [[TMP7]], i1 false)
10358 // TCHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
10359 // TCHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
10360 // TCHECK1-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
10361 // TCHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
10362 // TCHECK1-NEXT:    store double [[ADD]], ptr [[A]], align 8
10363 // TCHECK1-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
10364 // TCHECK1-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 8
10365 // TCHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
10366 // TCHECK1-NEXT:    store double [[INC]], ptr [[A4]], align 8
10367 // TCHECK1-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
10368 // TCHECK1-NEXT:    [[TMP10:%.*]] = mul nsw i64 1, [[TMP2]]
10369 // TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i64 [[TMP10]]
10370 // TCHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
10371 // TCHECK1-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
10372 // TCHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
10373 // TCHECK1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
10374 // TCHECK1-NEXT:    ret void
10375 //
10376 //
10377 // TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
10378 // TCHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10379 // TCHECK1-NEXT:  entry:
10380 // TCHECK1-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
10381 // TCHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
10382 // TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
10383 // TCHECK1-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10384 // TCHECK1-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
10385 // TCHECK1-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
10386 // TCHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
10387 // TCHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
10388 // TCHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i64 40, i1 false)
10389 // TCHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10390 // TCHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10391 // TCHECK1-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10392 // TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i64 0, i64 2
10393 // TCHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10394 // TCHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
10395 // TCHECK1-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
10396 // TCHECK1-NEXT:    ret void
10397 //
10398 //
10399 // TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
10400 // TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
10401 // TCHECK2-NEXT:  entry:
10402 // TCHECK2-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10403 // TCHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
10404 // TCHECK2-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 4
10405 // TCHECK2-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
10406 // TCHECK2-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10407 // TCHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
10408 // TCHECK2-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 4
10409 // TCHECK2-NEXT:    store i32 [[GA]], ptr [[GA_ADDR]], align 4
10410 // TCHECK2-NEXT:    ret void
10411 //
10412 //
10413 // TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
10414 // TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
10415 // TCHECK2-NEXT:  entry:
10416 // TCHECK2-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10417 // TCHECK2-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
10418 // TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
10419 // TCHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
10420 // TCHECK2-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 4
10421 // TCHECK2-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
10422 // TCHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
10423 // TCHECK2-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
10424 // TCHECK2-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 4
10425 // TCHECK2-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 4
10426 // TCHECK2-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
10427 // TCHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
10428 // TCHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
10429 // TCHECK2-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
10430 // TCHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
10431 // TCHECK2-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
10432 // TCHECK2-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
10433 // TCHECK2-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10434 // TCHECK2-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
10435 // TCHECK2-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
10436 // TCHECK2-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
10437 // TCHECK2-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 4
10438 // TCHECK2-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
10439 // TCHECK2-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
10440 // TCHECK2-NEXT:    store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
10441 // TCHECK2-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 4
10442 // TCHECK2-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
10443 // TCHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
10444 // TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
10445 // TCHECK2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
10446 // TCHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
10447 // TCHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
10448 // TCHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
10449 // TCHECK2-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
10450 // TCHECK2-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
10451 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
10452 // TCHECK2-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
10453 // TCHECK2-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
10454 // TCHECK2-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
10455 // TCHECK2-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
10456 // TCHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
10457 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
10458 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
10459 // TCHECK2-NEXT:    [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
10460 // TCHECK2-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
10461 // TCHECK2-NEXT:    store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
10462 // TCHECK2-NEXT:    store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
10463 // TCHECK2-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
10464 // TCHECK2-NEXT:    [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
10465 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
10466 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
10467 // TCHECK2-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
10468 // TCHECK2-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
10469 // TCHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
10470 // TCHECK2-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
10471 // TCHECK2-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
10472 // TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
10473 // TCHECK2-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
10474 // TCHECK2-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
10475 // TCHECK2-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
10476 // TCHECK2-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
10477 // TCHECK2-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
10478 // TCHECK2-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
10479 // TCHECK2-NEXT:    [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
10480 // TCHECK2-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
10481 // TCHECK2-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
10482 // TCHECK2-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
10483 // TCHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
10484 // TCHECK2-NEXT:    store i64 1, ptr [[X]], align 4
10485 // TCHECK2-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
10486 // TCHECK2-NEXT:    store i8 1, ptr [[Y]], align 4
10487 // TCHECK2-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
10488 // TCHECK2-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
10489 // TCHECK2-NEXT:    ret void
10490 //
10491 //
10492 // TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
10493 // TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
10494 // TCHECK2-NEXT:  entry:
10495 // TCHECK2-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10496 // TCHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
10497 // TCHECK2-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 4
10498 // TCHECK2-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10499 // TCHECK2-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
10500 // TCHECK2-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 4
10501 // TCHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
10502 // TCHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
10503 // TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
10504 // TCHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
10505 // TCHECK2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
10506 // TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
10507 // TCHECK2-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 4
10508 // TCHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
10509 // TCHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
10510 // TCHECK2-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 4
10511 // TCHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
10512 // TCHECK2-NEXT:    store double [[INC]], ptr [[ARRAYIDX1]], align 4
10513 // TCHECK2-NEXT:    ret void
10514 //
10515 //
10516 // TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
10517 // TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10518 // TCHECK2-NEXT:  entry:
10519 // TCHECK2-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10520 // TCHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
10521 // TCHECK2-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
10522 // TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
10523 // TCHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10524 // TCHECK2-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10525 // TCHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
10526 // TCHECK2-NEXT:    store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
10527 // TCHECK2-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
10528 // TCHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
10529 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
10530 // TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10531 // TCHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10532 // TCHECK2-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10533 // TCHECK2-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
10534 // TCHECK2-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
10535 // TCHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
10536 // TCHECK2-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
10537 // TCHECK2-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
10538 // TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
10539 // TCHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10540 // TCHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
10541 // TCHECK2-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
10542 // TCHECK2-NEXT:    ret void
10543 //
10544 //
10545 // TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
10546 // TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
10547 // TCHECK2-NEXT:  entry:
10548 // TCHECK2-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10549 // TCHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
10550 // TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
10551 // TCHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
10552 // TCHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
10553 // TCHECK2-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
10554 // TCHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
10555 // TCHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
10556 // TCHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
10557 // TCHECK2-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10558 // TCHECK2-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
10559 // TCHECK2-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
10560 // TCHECK2-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
10561 // TCHECK2-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
10562 // TCHECK2-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
10563 // TCHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
10564 // TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
10565 // TCHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
10566 // TCHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
10567 // TCHECK2-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
10568 // TCHECK2-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
10569 // TCHECK2-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
10570 // TCHECK2-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
10571 // TCHECK2-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
10572 // TCHECK2-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
10573 // TCHECK2-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
10574 // TCHECK2-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
10575 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
10576 // TCHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
10577 // TCHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
10578 // TCHECK2-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
10579 // TCHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
10580 // TCHECK2-NEXT:    store double [[ADD]], ptr [[A]], align 4
10581 // TCHECK2-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
10582 // TCHECK2-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 4
10583 // TCHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
10584 // TCHECK2-NEXT:    store double [[INC]], ptr [[A4]], align 4
10585 // TCHECK2-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
10586 // TCHECK2-NEXT:    [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
10587 // TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
10588 // TCHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
10589 // TCHECK2-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
10590 // TCHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
10591 // TCHECK2-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
10592 // TCHECK2-NEXT:    ret void
10593 //
10594 //
10595 // TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
10596 // TCHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10597 // TCHECK2-NEXT:  entry:
10598 // TCHECK2-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10599 // TCHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
10600 // TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
10601 // TCHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10602 // TCHECK2-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10603 // TCHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
10604 // TCHECK2-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
10605 // TCHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
10606 // TCHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
10607 // TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10608 // TCHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10609 // TCHECK2-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10610 // TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
10611 // TCHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10612 // TCHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
10613 // TCHECK2-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
10614 // TCHECK2-NEXT:    ret void
10615 //
10616 //
10617 // TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
10618 // TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
10619 // TCHECK3-NEXT:  entry:
10620 // TCHECK3-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10621 // TCHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
10622 // TCHECK3-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 4
10623 // TCHECK3-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
10624 // TCHECK3-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10625 // TCHECK3-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
10626 // TCHECK3-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 4
10627 // TCHECK3-NEXT:    store i32 [[GA]], ptr [[GA_ADDR]], align 4
10628 // TCHECK3-NEXT:    ret void
10629 //
10630 //
10631 // TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
10632 // TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
10633 // TCHECK3-NEXT:  entry:
10634 // TCHECK3-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10635 // TCHECK3-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
10636 // TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
10637 // TCHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
10638 // TCHECK3-NEXT:    [[BN_ADDR:%.*]] = alloca ptr, align 4
10639 // TCHECK3-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
10640 // TCHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
10641 // TCHECK3-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
10642 // TCHECK3-NEXT:    [[CN_ADDR:%.*]] = alloca ptr, align 4
10643 // TCHECK3-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 4
10644 // TCHECK3-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
10645 // TCHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
10646 // TCHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
10647 // TCHECK3-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
10648 // TCHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
10649 // TCHECK3-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
10650 // TCHECK3-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
10651 // TCHECK3-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10652 // TCHECK3-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
10653 // TCHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
10654 // TCHECK3-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
10655 // TCHECK3-NEXT:    store ptr [[BN]], ptr [[BN_ADDR]], align 4
10656 // TCHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
10657 // TCHECK3-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
10658 // TCHECK3-NEXT:    store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
10659 // TCHECK3-NEXT:    store ptr [[CN]], ptr [[CN_ADDR]], align 4
10660 // TCHECK3-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
10661 // TCHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
10662 // TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
10663 // TCHECK3-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
10664 // TCHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
10665 // TCHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
10666 // TCHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
10667 // TCHECK3-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
10668 // TCHECK3-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
10669 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B5]], ptr align 4 [[TMP0]], i32 40, i1 false)
10670 // TCHECK3-NEXT:    [[TMP8:%.*]] = call ptr @llvm.stacksave.p0()
10671 // TCHECK3-NEXT:    store ptr [[TMP8]], ptr [[SAVED_STACK]], align 4
10672 // TCHECK3-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
10673 // TCHECK3-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
10674 // TCHECK3-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP1]], 4
10675 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA6]], ptr align 4 [[TMP2]], i32 [[TMP9]], i1 false)
10676 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C7]], ptr align 8 [[TMP3]], i32 400, i1 false)
10677 // TCHECK3-NEXT:    [[TMP10:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
10678 // TCHECK3-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP10]], align 8
10679 // TCHECK3-NEXT:    store i32 [[TMP4]], ptr [[__VLA_EXPR1]], align 4
10680 // TCHECK3-NEXT:    store i32 [[TMP5]], ptr [[__VLA_EXPR2]], align 4
10681 // TCHECK3-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
10682 // TCHECK3-NEXT:    [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8
10683 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA8]], ptr align 8 [[TMP6]], i32 [[TMP12]], i1 false)
10684 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D9]], ptr align 4 [[TMP7]], i32 12, i1 false)
10685 // TCHECK3-NEXT:    [[TMP13:%.*]] = load i16, ptr [[AA_ADDR]], align 2
10686 // TCHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
10687 // TCHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
10688 // TCHECK3-NEXT:    [[CONV10:%.*]] = trunc i32 [[ADD]] to i16
10689 // TCHECK3-NEXT:    store i16 [[CONV10]], ptr [[AA_ADDR]], align 2
10690 // TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B5]], i32 0, i32 2
10691 // TCHECK3-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
10692 // TCHECK3-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[VLA6]], i32 3
10693 // TCHECK3-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX11]], align 4
10694 // TCHECK3-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C7]], i32 0, i32 1
10695 // TCHECK3-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i32 0, i32 2
10696 // TCHECK3-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX13]], align 8
10697 // TCHECK3-NEXT:    [[TMP14:%.*]] = mul nsw i32 1, [[TMP5]]
10698 // TCHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[VLA8]], i32 [[TMP14]]
10699 // TCHECK3-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3
10700 // TCHECK3-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX15]], align 8
10701 // TCHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 0
10702 // TCHECK3-NEXT:    store i64 1, ptr [[X]], align 4
10703 // TCHECK3-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D9]], i32 0, i32 1
10704 // TCHECK3-NEXT:    store i8 1, ptr [[Y]], align 4
10705 // TCHECK3-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
10706 // TCHECK3-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
10707 // TCHECK3-NEXT:    ret void
10708 //
10709 //
10710 // TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
10711 // TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
10712 // TCHECK3-NEXT:  entry:
10713 // TCHECK3-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10714 // TCHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
10715 // TCHECK3-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 4
10716 // TCHECK3-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10717 // TCHECK3-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
10718 // TCHECK3-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 4
10719 // TCHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
10720 // TCHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[TMP0]], i32 0, i32 0
10721 // TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
10722 // TCHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
10723 // TCHECK3-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
10724 // TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
10725 // TCHECK3-NEXT:    store double [[CONV]], ptr [[ARRAYIDX]], align 4
10726 // TCHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
10727 // TCHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
10728 // TCHECK3-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX1]], align 4
10729 // TCHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
10730 // TCHECK3-NEXT:    store double [[INC]], ptr [[ARRAYIDX1]], align 4
10731 // TCHECK3-NEXT:    ret void
10732 //
10733 //
10734 // TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
10735 // TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10736 // TCHECK3-NEXT:  entry:
10737 // TCHECK3-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10738 // TCHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
10739 // TCHECK3-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
10740 // TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
10741 // TCHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10742 // TCHECK3-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10743 // TCHECK3-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
10744 // TCHECK3-NEXT:    store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
10745 // TCHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
10746 // TCHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
10747 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
10748 // TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10749 // TCHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10750 // TCHECK3-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10751 // TCHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
10752 // TCHECK3-NEXT:    [[CONV:%.*]] = sext i8 [[TMP2]] to i32
10753 // TCHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CONV]], 1
10754 // TCHECK3-NEXT:    [[CONV3:%.*]] = trunc i32 [[ADD2]] to i8
10755 // TCHECK3-NEXT:    store i8 [[CONV3]], ptr [[AAA_ADDR]], align 1
10756 // TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
10757 // TCHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10758 // TCHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1
10759 // TCHECK3-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
10760 // TCHECK3-NEXT:    ret void
10761 //
10762 //
10763 // TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
10764 // TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
10765 // TCHECK3-NEXT:  entry:
10766 // TCHECK3-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10767 // TCHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
10768 // TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
10769 // TCHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
10770 // TCHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
10771 // TCHECK3-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 4
10772 // TCHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
10773 // TCHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
10774 // TCHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
10775 // TCHECK3-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10776 // TCHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
10777 // TCHECK3-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
10778 // TCHECK3-NEXT:    store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
10779 // TCHECK3-NEXT:    store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
10780 // TCHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
10781 // TCHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
10782 // TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
10783 // TCHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
10784 // TCHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
10785 // TCHECK3-NEXT:    [[TMP4:%.*]] = call ptr @llvm.stacksave.p0()
10786 // TCHECK3-NEXT:    store ptr [[TMP4]], ptr [[SAVED_STACK]], align 4
10787 // TCHECK3-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
10788 // TCHECK3-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
10789 // TCHECK3-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
10790 // TCHECK3-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
10791 // TCHECK3-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
10792 // TCHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
10793 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA3]], ptr align 2 [[TMP3]], i32 [[TMP7]], i1 false)
10794 // TCHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B_ADDR]], align 4
10795 // TCHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP8]] to double
10796 // TCHECK3-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
10797 // TCHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
10798 // TCHECK3-NEXT:    store double [[ADD]], ptr [[A]], align 4
10799 // TCHECK3-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
10800 // TCHECK3-NEXT:    [[TMP9:%.*]] = load double, ptr [[A4]], align 4
10801 // TCHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00
10802 // TCHECK3-NEXT:    store double [[INC]], ptr [[A4]], align 4
10803 // TCHECK3-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
10804 // TCHECK3-NEXT:    [[TMP10:%.*]] = mul nsw i32 1, [[TMP2]]
10805 // TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA3]], i32 [[TMP10]]
10806 // TCHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
10807 // TCHECK3-NEXT:    store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2
10808 // TCHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
10809 // TCHECK3-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
10810 // TCHECK3-NEXT:    ret void
10811 //
10812 //
10813 // TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
10814 // TCHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
10815 // TCHECK3-NEXT:  entry:
10816 // TCHECK3-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
10817 // TCHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
10818 // TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
10819 // TCHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
10820 // TCHECK3-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
10821 // TCHECK3-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
10822 // TCHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
10823 // TCHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
10824 // TCHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B1]], ptr align 4 [[TMP0]], i32 40, i1 false)
10825 // TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
10826 // TCHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
10827 // TCHECK3-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
10828 // TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B1]], i32 0, i32 2
10829 // TCHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
10830 // TCHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
10831 // TCHECK3-NEXT:    store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
10832 // TCHECK3-NEXT:    ret void
10833 //
10834 //
10835 // SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3fooiPd
10836 // SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
10837 // SIMD-ONLY1-NEXT:  entry:
10838 // SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
10839 // SIMD-ONLY1-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
10840 // SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
10841 // SIMD-ONLY1-NEXT:    [[AA:%.*]] = alloca i16, align 2
10842 // SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
10843 // SIMD-ONLY1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
10844 // SIMD-ONLY1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
10845 // SIMD-ONLY1-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
10846 // SIMD-ONLY1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
10847 // SIMD-ONLY1-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
10848 // SIMD-ONLY1-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
10849 // SIMD-ONLY1-NEXT:    [[P:%.*]] = alloca ptr, align 64
10850 // SIMD-ONLY1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
10851 // SIMD-ONLY1-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
10852 // SIMD-ONLY1-NEXT:    store i32 0, ptr [[A]], align 4
10853 // SIMD-ONLY1-NEXT:    store i16 0, ptr [[AA]], align 2
10854 // SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
10855 // SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
10856 // SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
10857 // SIMD-ONLY1-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
10858 // SIMD-ONLY1-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
10859 // SIMD-ONLY1-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
10860 // SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
10861 // SIMD-ONLY1-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
10862 // SIMD-ONLY1-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
10863 // SIMD-ONLY1-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
10864 // SIMD-ONLY1-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
10865 // SIMD-ONLY1-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
10866 // SIMD-ONLY1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
10867 // SIMD-ONLY1-NEXT:    store i32 [[TMP6]], ptr [[X]], align 4
10868 // SIMD-ONLY1-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
10869 // SIMD-ONLY1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
10870 // SIMD-ONLY1-NEXT:    store i32 [[TMP7]], ptr [[Y]], align 4
10871 // SIMD-ONLY1-NEXT:    store ptr [[A]], ptr [[P]], align 64
10872 // SIMD-ONLY1-NEXT:    [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
10873 // SIMD-ONLY1-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
10874 // SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
10875 // SIMD-ONLY1-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
10876 // SIMD-ONLY1-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
10877 // SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
10878 // SIMD-ONLY1-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
10879 // SIMD-ONLY1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
10880 // SIMD-ONLY1-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
10881 // SIMD-ONLY1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
10882 // SIMD-ONLY1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
10883 // SIMD-ONLY1-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
10884 // SIMD-ONLY1-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
10885 // SIMD-ONLY1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
10886 // SIMD-ONLY1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
10887 // SIMD-ONLY1-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
10888 // SIMD-ONLY1-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
10889 // SIMD-ONLY1-NEXT:    store i64 1, ptr [[X8]], align 8
10890 // SIMD-ONLY1-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
10891 // SIMD-ONLY1-NEXT:    store i8 1, ptr [[Y9]], align 8
10892 // SIMD-ONLY1-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
10893 // SIMD-ONLY1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
10894 // SIMD-ONLY1-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
10895 // SIMD-ONLY1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
10896 // SIMD-ONLY1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
10897 // SIMD-ONLY1-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
10898 // SIMD-ONLY1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
10899 // SIMD-ONLY1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
10900 // SIMD-ONLY1-NEXT:    [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
10901 // SIMD-ONLY1-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
10902 // SIMD-ONLY1-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 8
10903 // SIMD-ONLY1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[A]], align 4
10904 // SIMD-ONLY1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
10905 // SIMD-ONLY1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
10906 // SIMD-ONLY1-NEXT:    ret i32 [[TMP14]]
10907 //
10908 //
10909 // SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3bariPd
10910 // SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
10911 // SIMD-ONLY1-NEXT:  entry:
10912 // SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
10913 // SIMD-ONLY1-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
10914 // SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
10915 // SIMD-ONLY1-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
10916 // SIMD-ONLY1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
10917 // SIMD-ONLY1-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
10918 // SIMD-ONLY1-NEXT:    store i32 0, ptr [[A]], align 4
10919 // SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
10920 // SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
10921 // SIMD-ONLY1-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
10922 // SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
10923 // SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
10924 // SIMD-ONLY1-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
10925 // SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
10926 // SIMD-ONLY1-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
10927 // SIMD-ONLY1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
10928 // SIMD-ONLY1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
10929 // SIMD-ONLY1-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
10930 // SIMD-ONLY1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
10931 // SIMD-ONLY1-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
10932 // SIMD-ONLY1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
10933 // SIMD-ONLY1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
10934 // SIMD-ONLY1-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
10935 // SIMD-ONLY1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
10936 // SIMD-ONLY1-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
10937 // SIMD-ONLY1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
10938 // SIMD-ONLY1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
10939 // SIMD-ONLY1-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
10940 // SIMD-ONLY1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
10941 // SIMD-ONLY1-NEXT:    ret i32 [[TMP9]]
10942 //
10943 //
10944 // SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
10945 // SIMD-ONLY1-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
10946 // SIMD-ONLY1-NEXT:  entry:
10947 // SIMD-ONLY1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
10948 // SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
10949 // SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca i32, align 4
10950 // SIMD-ONLY1-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
10951 // SIMD-ONLY1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
10952 // SIMD-ONLY1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
10953 // SIMD-ONLY1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
10954 // SIMD-ONLY1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
10955 // SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
10956 // SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
10957 // SIMD-ONLY1-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
10958 // SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
10959 // SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
10960 // SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
10961 // SIMD-ONLY1-NEXT:    store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
10962 // SIMD-ONLY1-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
10963 // SIMD-ONLY1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
10964 // SIMD-ONLY1-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
10965 // SIMD-ONLY1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B]], align 4
10966 // SIMD-ONLY1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
10967 // SIMD-ONLY1-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
10968 // SIMD-ONLY1-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
10969 // SIMD-ONLY1-NEXT:    store double [[ADD2]], ptr [[A]], align 8
10970 // SIMD-ONLY1-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
10971 // SIMD-ONLY1-NEXT:    [[TMP6:%.*]] = load double, ptr [[A3]], align 8
10972 // SIMD-ONLY1-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
10973 // SIMD-ONLY1-NEXT:    store double [[INC]], ptr [[A3]], align 8
10974 // SIMD-ONLY1-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
10975 // SIMD-ONLY1-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
10976 // SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
10977 // SIMD-ONLY1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
10978 // SIMD-ONLY1-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
10979 // SIMD-ONLY1-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
10980 // SIMD-ONLY1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
10981 // SIMD-ONLY1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
10982 // SIMD-ONLY1-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
10983 // SIMD-ONLY1-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
10984 // SIMD-ONLY1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[B]], align 4
10985 // SIMD-ONLY1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
10986 // SIMD-ONLY1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
10987 // SIMD-ONLY1-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
10988 // SIMD-ONLY1-NEXT:    ret i32 [[ADD9]]
10989 //
10990 //
10991 // SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZL7fstatici
10992 // SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
10993 // SIMD-ONLY1-NEXT:  entry:
10994 // SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
10995 // SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
10996 // SIMD-ONLY1-NEXT:    [[AAA:%.*]] = alloca i8, align 1
10997 // SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
10998 // SIMD-ONLY1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
10999 // SIMD-ONLY1-NEXT:    store i32 0, ptr [[A]], align 4
11000 // SIMD-ONLY1-NEXT:    store i8 0, ptr [[AAA]], align 1
11001 // SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11002 // SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11003 // SIMD-ONLY1-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11004 // SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
11005 // SIMD-ONLY1-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
11006 // SIMD-ONLY1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
11007 // SIMD-ONLY1-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
11008 // SIMD-ONLY1-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
11009 // SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
11010 // SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11011 // SIMD-ONLY1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
11012 // SIMD-ONLY1-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
11013 // SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
11014 // SIMD-ONLY1-NEXT:    ret i32 [[TMP3]]
11015 //
11016 //
11017 // SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
11018 // SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
11019 // SIMD-ONLY1-NEXT:  entry:
11020 // SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11021 // SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
11022 // SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
11023 // SIMD-ONLY1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11024 // SIMD-ONLY1-NEXT:    store i32 0, ptr [[A]], align 4
11025 // SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11026 // SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11027 // SIMD-ONLY1-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11028 // SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
11029 // SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11030 // SIMD-ONLY1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
11031 // SIMD-ONLY1-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
11032 // SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
11033 // SIMD-ONLY1-NEXT:    ret i32 [[TMP2]]
11034 //
11035 //
11036 // SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3fooiPd
11037 // SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
11038 // SIMD-ONLY11-NEXT:  entry:
11039 // SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11040 // SIMD-ONLY11-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
11041 // SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
11042 // SIMD-ONLY11-NEXT:    [[AA:%.*]] = alloca i16, align 2
11043 // SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
11044 // SIMD-ONLY11-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
11045 // SIMD-ONLY11-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
11046 // SIMD-ONLY11-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
11047 // SIMD-ONLY11-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
11048 // SIMD-ONLY11-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
11049 // SIMD-ONLY11-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
11050 // SIMD-ONLY11-NEXT:    [[P:%.*]] = alloca ptr, align 64
11051 // SIMD-ONLY11-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11052 // SIMD-ONLY11-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
11053 // SIMD-ONLY11-NEXT:    store i32 0, ptr [[A]], align 4
11054 // SIMD-ONLY11-NEXT:    store i16 0, ptr [[AA]], align 2
11055 // SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11056 // SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
11057 // SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
11058 // SIMD-ONLY11-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
11059 // SIMD-ONLY11-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
11060 // SIMD-ONLY11-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
11061 // SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
11062 // SIMD-ONLY11-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
11063 // SIMD-ONLY11-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
11064 // SIMD-ONLY11-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
11065 // SIMD-ONLY11-NEXT:    store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
11066 // SIMD-ONLY11-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
11067 // SIMD-ONLY11-NEXT:    [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
11068 // SIMD-ONLY11-NEXT:    store i32 [[TMP6]], ptr [[X]], align 4
11069 // SIMD-ONLY11-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
11070 // SIMD-ONLY11-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
11071 // SIMD-ONLY11-NEXT:    store i32 [[TMP7]], ptr [[Y]], align 4
11072 // SIMD-ONLY11-NEXT:    store ptr [[A]], ptr [[P]], align 64
11073 // SIMD-ONLY11-NEXT:    [[TMP8:%.*]] = load i16, ptr [[AA]], align 2
11074 // SIMD-ONLY11-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
11075 // SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
11076 // SIMD-ONLY11-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
11077 // SIMD-ONLY11-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
11078 // SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i64 0, i64 2
11079 // SIMD-ONLY11-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
11080 // SIMD-ONLY11-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 3
11081 // SIMD-ONLY11-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
11082 // SIMD-ONLY11-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i64 0, i64 1
11083 // SIMD-ONLY11-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i64 0, i64 2
11084 // SIMD-ONLY11-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
11085 // SIMD-ONLY11-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
11086 // SIMD-ONLY11-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i64 [[TMP9]]
11087 // SIMD-ONLY11-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i64 3
11088 // SIMD-ONLY11-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
11089 // SIMD-ONLY11-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
11090 // SIMD-ONLY11-NEXT:    store i64 1, ptr [[X8]], align 8
11091 // SIMD-ONLY11-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
11092 // SIMD-ONLY11-NEXT:    store i8 1, ptr [[Y9]], align 8
11093 // SIMD-ONLY11-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
11094 // SIMD-ONLY11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[X10]], align 4
11095 // SIMD-ONLY11-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
11096 // SIMD-ONLY11-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
11097 // SIMD-ONLY11-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 0
11098 // SIMD-ONLY11-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 8
11099 // SIMD-ONLY11-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
11100 // SIMD-ONLY11-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 0
11101 // SIMD-ONLY11-NEXT:    [[TMP13:%.*]] = load double, ptr [[ARRAYIDX13]], align 8
11102 // SIMD-ONLY11-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
11103 // SIMD-ONLY11-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 8
11104 // SIMD-ONLY11-NEXT:    [[TMP14:%.*]] = load i32, ptr [[A]], align 4
11105 // SIMD-ONLY11-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
11106 // SIMD-ONLY11-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP15]])
11107 // SIMD-ONLY11-NEXT:    ret i32 [[TMP14]]
11108 //
11109 //
11110 // SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3bariPd
11111 // SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
11112 // SIMD-ONLY11-NEXT:  entry:
11113 // SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11114 // SIMD-ONLY11-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
11115 // SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
11116 // SIMD-ONLY11-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
11117 // SIMD-ONLY11-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11118 // SIMD-ONLY11-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
11119 // SIMD-ONLY11-NEXT:    store i32 0, ptr [[A]], align 4
11120 // SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11121 // SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
11122 // SIMD-ONLY11-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], ptr noundef [[TMP1]])
11123 // SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
11124 // SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
11125 // SIMD-ONLY11-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11126 // SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
11127 // SIMD-ONLY11-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
11128 // SIMD-ONLY11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
11129 // SIMD-ONLY11-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
11130 // SIMD-ONLY11-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
11131 // SIMD-ONLY11-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
11132 // SIMD-ONLY11-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
11133 // SIMD-ONLY11-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
11134 // SIMD-ONLY11-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
11135 // SIMD-ONLY11-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
11136 // SIMD-ONLY11-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
11137 // SIMD-ONLY11-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
11138 // SIMD-ONLY11-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
11139 // SIMD-ONLY11-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
11140 // SIMD-ONLY11-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
11141 // SIMD-ONLY11-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
11142 // SIMD-ONLY11-NEXT:    ret i32 [[TMP9]]
11143 //
11144 //
11145 // SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
11146 // SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
11147 // SIMD-ONLY11-NEXT:  entry:
11148 // SIMD-ONLY11-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
11149 // SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11150 // SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca i32, align 4
11151 // SIMD-ONLY11-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 8
11152 // SIMD-ONLY11-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
11153 // SIMD-ONLY11-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
11154 // SIMD-ONLY11-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11155 // SIMD-ONLY11-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
11156 // SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11157 // SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11158 // SIMD-ONLY11-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
11159 // SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
11160 // SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
11161 // SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = call ptr @llvm.stacksave.p0()
11162 // SIMD-ONLY11-NEXT:    store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8
11163 // SIMD-ONLY11-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
11164 // SIMD-ONLY11-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
11165 // SIMD-ONLY11-NEXT:    store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8
11166 // SIMD-ONLY11-NEXT:    [[TMP5:%.*]] = load i32, ptr [[B]], align 4
11167 // SIMD-ONLY11-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
11168 // SIMD-ONLY11-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
11169 // SIMD-ONLY11-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
11170 // SIMD-ONLY11-NEXT:    store double [[ADD2]], ptr [[A]], align 8
11171 // SIMD-ONLY11-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
11172 // SIMD-ONLY11-NEXT:    [[TMP6:%.*]] = load double, ptr [[A3]], align 8
11173 // SIMD-ONLY11-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
11174 // SIMD-ONLY11-NEXT:    store double [[INC]], ptr [[A3]], align 8
11175 // SIMD-ONLY11-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
11176 // SIMD-ONLY11-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
11177 // SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP7]]
11178 // SIMD-ONLY11-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1
11179 // SIMD-ONLY11-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
11180 // SIMD-ONLY11-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
11181 // SIMD-ONLY11-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP8]]
11182 // SIMD-ONLY11-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i64 1
11183 // SIMD-ONLY11-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
11184 // SIMD-ONLY11-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
11185 // SIMD-ONLY11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[B]], align 4
11186 // SIMD-ONLY11-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
11187 // SIMD-ONLY11-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
11188 // SIMD-ONLY11-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP11]])
11189 // SIMD-ONLY11-NEXT:    ret i32 [[ADD9]]
11190 //
11191 //
11192 // SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZL7fstatici
11193 // SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
11194 // SIMD-ONLY11-NEXT:  entry:
11195 // SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11196 // SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
11197 // SIMD-ONLY11-NEXT:    [[AAA:%.*]] = alloca i8, align 1
11198 // SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
11199 // SIMD-ONLY11-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11200 // SIMD-ONLY11-NEXT:    store i32 0, ptr [[A]], align 4
11201 // SIMD-ONLY11-NEXT:    store i8 0, ptr [[AAA]], align 1
11202 // SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11203 // SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11204 // SIMD-ONLY11-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11205 // SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
11206 // SIMD-ONLY11-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
11207 // SIMD-ONLY11-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
11208 // SIMD-ONLY11-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
11209 // SIMD-ONLY11-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
11210 // SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
11211 // SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11212 // SIMD-ONLY11-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
11213 // SIMD-ONLY11-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
11214 // SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
11215 // SIMD-ONLY11-NEXT:    ret i32 [[TMP3]]
11216 //
11217 //
11218 // SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
11219 // SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
11220 // SIMD-ONLY11-NEXT:  entry:
11221 // SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11222 // SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
11223 // SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
11224 // SIMD-ONLY11-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11225 // SIMD-ONLY11-NEXT:    store i32 0, ptr [[A]], align 4
11226 // SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11227 // SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11228 // SIMD-ONLY11-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11229 // SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 2
11230 // SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11231 // SIMD-ONLY11-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
11232 // SIMD-ONLY11-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
11233 // SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
11234 // SIMD-ONLY11-NEXT:    ret i32 [[TMP2]]
11235 //
11236 //
11237 // SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3fooiPd
11238 // SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
11239 // SIMD-ONLY12-NEXT:  entry:
11240 // SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11241 // SIMD-ONLY12-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
11242 // SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
11243 // SIMD-ONLY12-NEXT:    [[AA:%.*]] = alloca i16, align 2
11244 // SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
11245 // SIMD-ONLY12-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
11246 // SIMD-ONLY12-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
11247 // SIMD-ONLY12-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
11248 // SIMD-ONLY12-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
11249 // SIMD-ONLY12-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
11250 // SIMD-ONLY12-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
11251 // SIMD-ONLY12-NEXT:    [[P:%.*]] = alloca ptr, align 64
11252 // SIMD-ONLY12-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11253 // SIMD-ONLY12-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
11254 // SIMD-ONLY12-NEXT:    store i32 0, ptr [[A]], align 4
11255 // SIMD-ONLY12-NEXT:    store i16 0, ptr [[AA]], align 2
11256 // SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11257 // SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
11258 // SIMD-ONLY12-NEXT:    store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
11259 // SIMD-ONLY12-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
11260 // SIMD-ONLY12-NEXT:    store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
11261 // SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
11262 // SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
11263 // SIMD-ONLY12-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
11264 // SIMD-ONLY12-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
11265 // SIMD-ONLY12-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
11266 // SIMD-ONLY12-NEXT:    [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
11267 // SIMD-ONLY12-NEXT:    store i32 [[TMP4]], ptr [[X]], align 4
11268 // SIMD-ONLY12-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
11269 // SIMD-ONLY12-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
11270 // SIMD-ONLY12-NEXT:    store i32 [[TMP5]], ptr [[Y]], align 4
11271 // SIMD-ONLY12-NEXT:    store ptr [[A]], ptr [[P]], align 64
11272 // SIMD-ONLY12-NEXT:    [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
11273 // SIMD-ONLY12-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
11274 // SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
11275 // SIMD-ONLY12-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
11276 // SIMD-ONLY12-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
11277 // SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
11278 // SIMD-ONLY12-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
11279 // SIMD-ONLY12-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
11280 // SIMD-ONLY12-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
11281 // SIMD-ONLY12-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
11282 // SIMD-ONLY12-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
11283 // SIMD-ONLY12-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
11284 // SIMD-ONLY12-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
11285 // SIMD-ONLY12-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
11286 // SIMD-ONLY12-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
11287 // SIMD-ONLY12-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
11288 // SIMD-ONLY12-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
11289 // SIMD-ONLY12-NEXT:    store i64 1, ptr [[X8]], align 4
11290 // SIMD-ONLY12-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
11291 // SIMD-ONLY12-NEXT:    store i8 1, ptr [[Y9]], align 4
11292 // SIMD-ONLY12-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
11293 // SIMD-ONLY12-NEXT:    [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
11294 // SIMD-ONLY12-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
11295 // SIMD-ONLY12-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
11296 // SIMD-ONLY12-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
11297 // SIMD-ONLY12-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
11298 // SIMD-ONLY12-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
11299 // SIMD-ONLY12-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
11300 // SIMD-ONLY12-NEXT:    [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
11301 // SIMD-ONLY12-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
11302 // SIMD-ONLY12-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 4
11303 // SIMD-ONLY12-NEXT:    [[TMP12:%.*]] = load i32, ptr [[A]], align 4
11304 // SIMD-ONLY12-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
11305 // SIMD-ONLY12-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP13]])
11306 // SIMD-ONLY12-NEXT:    ret i32 [[TMP12]]
11307 //
11308 //
11309 // SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3bariPd
11310 // SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
11311 // SIMD-ONLY12-NEXT:  entry:
11312 // SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11313 // SIMD-ONLY12-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
11314 // SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
11315 // SIMD-ONLY12-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
11316 // SIMD-ONLY12-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11317 // SIMD-ONLY12-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
11318 // SIMD-ONLY12-NEXT:    store i32 0, ptr [[A]], align 4
11319 // SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11320 // SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
11321 // SIMD-ONLY12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
11322 // SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
11323 // SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
11324 // SIMD-ONLY12-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11325 // SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
11326 // SIMD-ONLY12-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
11327 // SIMD-ONLY12-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
11328 // SIMD-ONLY12-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
11329 // SIMD-ONLY12-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
11330 // SIMD-ONLY12-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
11331 // SIMD-ONLY12-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
11332 // SIMD-ONLY12-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
11333 // SIMD-ONLY12-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
11334 // SIMD-ONLY12-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
11335 // SIMD-ONLY12-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
11336 // SIMD-ONLY12-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
11337 // SIMD-ONLY12-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
11338 // SIMD-ONLY12-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
11339 // SIMD-ONLY12-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
11340 // SIMD-ONLY12-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
11341 // SIMD-ONLY12-NEXT:    ret i32 [[TMP9]]
11342 //
11343 //
11344 // SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
11345 // SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
11346 // SIMD-ONLY12-NEXT:  entry:
11347 // SIMD-ONLY12-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
11348 // SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11349 // SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca i32, align 4
11350 // SIMD-ONLY12-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
11351 // SIMD-ONLY12-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
11352 // SIMD-ONLY12-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
11353 // SIMD-ONLY12-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11354 // SIMD-ONLY12-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
11355 // SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11356 // SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11357 // SIMD-ONLY12-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
11358 // SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
11359 // SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
11360 // SIMD-ONLY12-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
11361 // SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
11362 // SIMD-ONLY12-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
11363 // SIMD-ONLY12-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
11364 // SIMD-ONLY12-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B]], align 4
11365 // SIMD-ONLY12-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
11366 // SIMD-ONLY12-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
11367 // SIMD-ONLY12-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
11368 // SIMD-ONLY12-NEXT:    store double [[ADD2]], ptr [[A]], align 4
11369 // SIMD-ONLY12-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
11370 // SIMD-ONLY12-NEXT:    [[TMP5:%.*]] = load double, ptr [[A3]], align 4
11371 // SIMD-ONLY12-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
11372 // SIMD-ONLY12-NEXT:    store double [[INC]], ptr [[A3]], align 4
11373 // SIMD-ONLY12-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
11374 // SIMD-ONLY12-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
11375 // SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
11376 // SIMD-ONLY12-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
11377 // SIMD-ONLY12-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
11378 // SIMD-ONLY12-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
11379 // SIMD-ONLY12-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
11380 // SIMD-ONLY12-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
11381 // SIMD-ONLY12-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
11382 // SIMD-ONLY12-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
11383 // SIMD-ONLY12-NEXT:    [[TMP9:%.*]] = load i32, ptr [[B]], align 4
11384 // SIMD-ONLY12-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
11385 // SIMD-ONLY12-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
11386 // SIMD-ONLY12-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP10]])
11387 // SIMD-ONLY12-NEXT:    ret i32 [[ADD9]]
11388 //
11389 //
11390 // SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZL7fstatici
11391 // SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
11392 // SIMD-ONLY12-NEXT:  entry:
11393 // SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11394 // SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
11395 // SIMD-ONLY12-NEXT:    [[AAA:%.*]] = alloca i8, align 1
11396 // SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
11397 // SIMD-ONLY12-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11398 // SIMD-ONLY12-NEXT:    store i32 0, ptr [[A]], align 4
11399 // SIMD-ONLY12-NEXT:    store i8 0, ptr [[AAA]], align 1
11400 // SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11401 // SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11402 // SIMD-ONLY12-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11403 // SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
11404 // SIMD-ONLY12-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
11405 // SIMD-ONLY12-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
11406 // SIMD-ONLY12-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
11407 // SIMD-ONLY12-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
11408 // SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
11409 // SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11410 // SIMD-ONLY12-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
11411 // SIMD-ONLY12-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
11412 // SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
11413 // SIMD-ONLY12-NEXT:    ret i32 [[TMP3]]
11414 //
11415 //
11416 // SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
11417 // SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
11418 // SIMD-ONLY12-NEXT:  entry:
11419 // SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11420 // SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
11421 // SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
11422 // SIMD-ONLY12-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11423 // SIMD-ONLY12-NEXT:    store i32 0, ptr [[A]], align 4
11424 // SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11425 // SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11426 // SIMD-ONLY12-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11427 // SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
11428 // SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11429 // SIMD-ONLY12-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
11430 // SIMD-ONLY12-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
11431 // SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
11432 // SIMD-ONLY12-NEXT:    ret i32 [[TMP2]]
11433 //
11434 //
11435 // SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3fooiPd
11436 // SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
11437 // SIMD-ONLY13-NEXT:  entry:
11438 // SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11439 // SIMD-ONLY13-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
11440 // SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
11441 // SIMD-ONLY13-NEXT:    [[AA:%.*]] = alloca i16, align 2
11442 // SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
11443 // SIMD-ONLY13-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
11444 // SIMD-ONLY13-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
11445 // SIMD-ONLY13-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
11446 // SIMD-ONLY13-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
11447 // SIMD-ONLY13-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
11448 // SIMD-ONLY13-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
11449 // SIMD-ONLY13-NEXT:    [[P:%.*]] = alloca ptr, align 64
11450 // SIMD-ONLY13-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11451 // SIMD-ONLY13-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
11452 // SIMD-ONLY13-NEXT:    store i32 0, ptr [[A]], align 4
11453 // SIMD-ONLY13-NEXT:    store i16 0, ptr [[AA]], align 2
11454 // SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11455 // SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave.p0()
11456 // SIMD-ONLY13-NEXT:    store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4
11457 // SIMD-ONLY13-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
11458 // SIMD-ONLY13-NEXT:    store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
11459 // SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
11460 // SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
11461 // SIMD-ONLY13-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
11462 // SIMD-ONLY13-NEXT:    store i32 [[TMP2]], ptr [[__VLA_EXPR1]], align 4
11463 // SIMD-ONLY13-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
11464 // SIMD-ONLY13-NEXT:    [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
11465 // SIMD-ONLY13-NEXT:    store i32 [[TMP4]], ptr [[X]], align 4
11466 // SIMD-ONLY13-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 1
11467 // SIMD-ONLY13-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
11468 // SIMD-ONLY13-NEXT:    store i32 [[TMP5]], ptr [[Y]], align 4
11469 // SIMD-ONLY13-NEXT:    store ptr [[A]], ptr [[P]], align 64
11470 // SIMD-ONLY13-NEXT:    [[TMP6:%.*]] = load i16, ptr [[AA]], align 2
11471 // SIMD-ONLY13-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
11472 // SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
11473 // SIMD-ONLY13-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
11474 // SIMD-ONLY13-NEXT:    store i16 [[CONV2]], ptr [[AA]], align 2
11475 // SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[B]], i32 0, i32 2
11476 // SIMD-ONLY13-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX]], align 4
11477 // SIMD-ONLY13-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[VLA]], i32 3
11478 // SIMD-ONLY13-NEXT:    store float 1.000000e+00, ptr [[ARRAYIDX3]], align 4
11479 // SIMD-ONLY13-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C]], i32 0, i32 1
11480 // SIMD-ONLY13-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX4]], i32 0, i32 2
11481 // SIMD-ONLY13-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX5]], align 8
11482 // SIMD-ONLY13-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
11483 // SIMD-ONLY13-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[VLA1]], i32 [[TMP7]]
11484 // SIMD-ONLY13-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX6]], i32 3
11485 // SIMD-ONLY13-NEXT:    store double 1.000000e+00, ptr [[ARRAYIDX7]], align 8
11486 // SIMD-ONLY13-NEXT:    [[X8:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 0
11487 // SIMD-ONLY13-NEXT:    store i64 1, ptr [[X8]], align 4
11488 // SIMD-ONLY13-NEXT:    [[Y9:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D]], i32 0, i32 1
11489 // SIMD-ONLY13-NEXT:    store i8 1, ptr [[Y9]], align 4
11490 // SIMD-ONLY13-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0]], ptr [[E]], i32 0, i32 0
11491 // SIMD-ONLY13-NEXT:    [[TMP8:%.*]] = load i32, ptr [[X10]], align 4
11492 // SIMD-ONLY13-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
11493 // SIMD-ONLY13-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
11494 // SIMD-ONLY13-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i32 0
11495 // SIMD-ONLY13-NEXT:    store double [[CONV11]], ptr [[ARRAYIDX12]], align 4
11496 // SIMD-ONLY13-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
11497 // SIMD-ONLY13-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i32 0
11498 // SIMD-ONLY13-NEXT:    [[TMP11:%.*]] = load double, ptr [[ARRAYIDX13]], align 4
11499 // SIMD-ONLY13-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
11500 // SIMD-ONLY13-NEXT:    store double [[INC]], ptr [[ARRAYIDX13]], align 4
11501 // SIMD-ONLY13-NEXT:    [[TMP12:%.*]] = load i32, ptr [[A]], align 4
11502 // SIMD-ONLY13-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
11503 // SIMD-ONLY13-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP13]])
11504 // SIMD-ONLY13-NEXT:    ret i32 [[TMP12]]
11505 //
11506 //
11507 // SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3bariPd
11508 // SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
11509 // SIMD-ONLY13-NEXT:  entry:
11510 // SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11511 // SIMD-ONLY13-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 4
11512 // SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
11513 // SIMD-ONLY13-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
11514 // SIMD-ONLY13-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11515 // SIMD-ONLY13-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 4
11516 // SIMD-ONLY13-NEXT:    store i32 0, ptr [[A]], align 4
11517 // SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11518 // SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4
11519 // SIMD-ONLY13-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], ptr noundef [[TMP1]])
11520 // SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
11521 // SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
11522 // SIMD-ONLY13-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11523 // SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
11524 // SIMD-ONLY13-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
11525 // SIMD-ONLY13-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
11526 // SIMD-ONLY13-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
11527 // SIMD-ONLY13-NEXT:    store i32 [[ADD2]], ptr [[A]], align 4
11528 // SIMD-ONLY13-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
11529 // SIMD-ONLY13-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
11530 // SIMD-ONLY13-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4
11531 // SIMD-ONLY13-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
11532 // SIMD-ONLY13-NEXT:    store i32 [[ADD4]], ptr [[A]], align 4
11533 // SIMD-ONLY13-NEXT:    [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4
11534 // SIMD-ONLY13-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
11535 // SIMD-ONLY13-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4
11536 // SIMD-ONLY13-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
11537 // SIMD-ONLY13-NEXT:    store i32 [[ADD6]], ptr [[A]], align 4
11538 // SIMD-ONLY13-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
11539 // SIMD-ONLY13-NEXT:    ret i32 [[TMP9]]
11540 //
11541 //
11542 // SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
11543 // SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
11544 // SIMD-ONLY13-NEXT:  entry:
11545 // SIMD-ONLY13-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
11546 // SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11547 // SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca i32, align 4
11548 // SIMD-ONLY13-NEXT:    [[SAVED_STACK:%.*]] = alloca ptr, align 4
11549 // SIMD-ONLY13-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
11550 // SIMD-ONLY13-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
11551 // SIMD-ONLY13-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11552 // SIMD-ONLY13-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
11553 // SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
11554 // SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11555 // SIMD-ONLY13-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
11556 // SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
11557 // SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
11558 // SIMD-ONLY13-NEXT:    store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4
11559 // SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
11560 // SIMD-ONLY13-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
11561 // SIMD-ONLY13-NEXT:    store i32 [[TMP1]], ptr [[__VLA_EXPR0]], align 4
11562 // SIMD-ONLY13-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B]], align 4
11563 // SIMD-ONLY13-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
11564 // SIMD-ONLY13-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
11565 // SIMD-ONLY13-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
11566 // SIMD-ONLY13-NEXT:    store double [[ADD2]], ptr [[A]], align 4
11567 // SIMD-ONLY13-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
11568 // SIMD-ONLY13-NEXT:    [[TMP5:%.*]] = load double, ptr [[A3]], align 4
11569 // SIMD-ONLY13-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
11570 // SIMD-ONLY13-NEXT:    store double [[INC]], ptr [[A3]], align 4
11571 // SIMD-ONLY13-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
11572 // SIMD-ONLY13-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
11573 // SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP6]]
11574 // SIMD-ONLY13-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1
11575 // SIMD-ONLY13-NEXT:    store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2
11576 // SIMD-ONLY13-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
11577 // SIMD-ONLY13-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP7]]
11578 // SIMD-ONLY13-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX6]], i32 1
11579 // SIMD-ONLY13-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
11580 // SIMD-ONLY13-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
11581 // SIMD-ONLY13-NEXT:    [[TMP9:%.*]] = load i32, ptr [[B]], align 4
11582 // SIMD-ONLY13-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
11583 // SIMD-ONLY13-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4
11584 // SIMD-ONLY13-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP10]])
11585 // SIMD-ONLY13-NEXT:    ret i32 [[ADD9]]
11586 //
11587 //
11588 // SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZL7fstatici
11589 // SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
11590 // SIMD-ONLY13-NEXT:  entry:
11591 // SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11592 // SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
11593 // SIMD-ONLY13-NEXT:    [[AAA:%.*]] = alloca i8, align 1
11594 // SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
11595 // SIMD-ONLY13-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11596 // SIMD-ONLY13-NEXT:    store i32 0, ptr [[A]], align 4
11597 // SIMD-ONLY13-NEXT:    store i8 0, ptr [[AAA]], align 1
11598 // SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11599 // SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11600 // SIMD-ONLY13-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11601 // SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load i8, ptr [[AAA]], align 1
11602 // SIMD-ONLY13-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
11603 // SIMD-ONLY13-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
11604 // SIMD-ONLY13-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
11605 // SIMD-ONLY13-NEXT:    store i8 [[CONV2]], ptr [[AAA]], align 1
11606 // SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
11607 // SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11608 // SIMD-ONLY13-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
11609 // SIMD-ONLY13-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
11610 // SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
11611 // SIMD-ONLY13-NEXT:    ret i32 [[TMP3]]
11612 //
11613 //
11614 // SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
11615 // SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
11616 // SIMD-ONLY13-NEXT:  entry:
11617 // SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
11618 // SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
11619 // SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
11620 // SIMD-ONLY13-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
11621 // SIMD-ONLY13-NEXT:    store i32 0, ptr [[A]], align 4
11622 // SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
11623 // SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
11624 // SIMD-ONLY13-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
11625 // SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 2
11626 // SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
11627 // SIMD-ONLY13-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
11628 // SIMD-ONLY13-NEXT:    store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4
11629 // SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
11630 // SIMD-ONLY13-NEXT:    ret i32 [[TMP2]]
11631 //
11632