xref: /llvm-project/llvm/test/Transforms/OpenMP/custom_state_machines.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
2; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
3; RUN: opt --mtriple=nvptx64--         -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
4; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU-DISABLED
5; RUN: opt --mtriple=nvptx64--         -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX-DISABLED
6
7;; void p0(void);
8;; void p1(void);
9;; int unknown(void);
10;; void unknown_pure(void) __attribute__((pure));
11;; [[omp::assume("omp_no_openmp")]] void unknown_no_openmp(void);
12;;
13;; int G;
14;; void no_parallel_region_in_here(void) {
15;; #pragma omp single
16;;   G = 0;
17;; }
18;;
19;; void no_state_machine_needed() {
20;; #pragma omp target teams
21;;   {
22;;     no_parallel_region_in_here();
23;;     unknown_no_openmp();
24;;   }
25;; }
26;;
27;; void simple_state_machine() {
28;; #pragma omp target teams
29;;   {
30;;     unknown_no_openmp();
31;; #pragma omp parallel
32;;     { p0(); }
33;;     no_parallel_region_in_here();
34;; #pragma omp parallel
35;;     { p1(); }
36;;   }
37;; }
38;;
39;; void simple_state_machine_interprocedural_after(void);
40;; void simple_state_machine_interprocedural_before(void) {
41;; #pragma omp parallel
42;;   { p0(); }
43;; }
44;; void simple_state_machine_interprocedural() {
45;; #pragma omp target teams
46;;   {
47;;     unknown_no_openmp();
48;;     simple_state_machine_interprocedural_before();
49;;     no_parallel_region_in_here();
50;; #pragma omp parallel
51;;     { p1(); }
52;;     simple_state_machine_interprocedural_after();
53;;   }
54;; }
55;; void simple_state_machine_interprocedural_after(void) {
56;; #pragma omp parallel
57;;   { p0(); }
58;; }
59;;
60;; void simple_state_machine_with_fallback() {
61;; #pragma omp target teams
62;;   {
63;; #pragma omp parallel
64;;     { p0(); }
65;;     unknown();
66;; #pragma omp parallel
67;;     { p1(); }
68;;   }
69;; }
70;;
71;; void simple_state_machine_no_openmp_attr() {
72;; #pragma omp target teams
73;;   {
74;; #pragma omp parallel
75;;     { p0(); }
76;;     unknown_no_openmp();
77;; #pragma omp parallel
78;;     { p1(); }
79;;   }
80;; }
81;;
82;; void simple_state_machine_pure() {
83;; #pragma omp target teams
84;;   {
85;;     unknown_no_openmp();
86;; #pragma omp parallel
87;;     { p0(); }
88;;     unknown_pure();
89;; #pragma omp parallel
90;;     { p1(); }
91;;   }
92;; }
93;;
94;; int omp_get_thread_num();
95;; void simple_state_machine_interprocedural_nested_recursive_after(int);
96;; void simple_state_machine_interprocedural_nested_recursive_after_after(void);
97;; void simple_state_machine_interprocedural_nested_recursive() {
98;; #pragma omp target teams
99;;   {
100;;     simple_state_machine_interprocedural_nested_recursive_after(
101;;         omp_get_thread_num());
102;;   }
103;; }
104;;
105;; void simple_state_machine_interprocedural_nested_recursive_after(int a) {
106;;   if (a == 0)
107;;     return;
108;;   simple_state_machine_interprocedural_nested_recursive_after(a - 1);
109;;   simple_state_machine_interprocedural_nested_recursive_after_after();
110;; }
111;; void simple_state_machine_interprocedural_nested_recursive_after_after(void) {
112;; #pragma omp parallel
113;;   { p0(); }
114;; }
115;;
116;; __attribute__((weak)) void weak_callee_empty(void) {}
117;; void no_state_machine_weak_callee() {
118;; #pragma omp target teams
119;;   { weak_callee_empty(); }
120;; }
121
122%struct.ident_t = type { i32, i32, i32, i32, ptr }
123%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
124%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
125
126@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
127@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
128@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
129@G = external global i32, align 4
130@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8
131
132@__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
133@__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
134@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
135@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
136@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
137@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
138@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
139@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
140
141define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
142entry:
143  %.zero.addr = alloca i32, align 4
144  %.threadid_temp. = alloca i32, align 4
145  store i32 0, ptr %.zero.addr, align 4
146  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr %dyn)
147  %exec_user_code = icmp eq i32 %0, -1
148  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
149
150user_code.entry:                                  ; preds = %entry
151  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
152  store i32 %1, ptr %.threadid_temp., align 4
153  call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
154  call void @__kmpc_target_deinit()
155  ret void
156
157worker.exit:                                      ; preds = %entry
158  ret void
159}
160
161; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
162define weak i32 @__kmpc_target_init(ptr, ptr) {
163  ret i32 0
164}
165
166define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
167entry:
168  %.global_tid..addr = alloca ptr, align 8
169  %.bound_tid..addr = alloca ptr, align 8
170  store ptr %.global_tid., ptr %.global_tid..addr, align 8
171  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
172  call void @no_parallel_region_in_here() #7
173  call void @unknown_no_openmp() #8
174  ret void
175}
176
177define hidden void @no_parallel_region_in_here() #1 {
178entry:
179  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
180  %1 = call i32 @__kmpc_single(ptr @2, i32 %0)
181  %2 = icmp ne i32 %1, 0
182  br i1 %2, label %omp_if.then, label %omp_if.end
183
184omp_if.then:                                      ; preds = %entry
185  store i32 0, ptr @G, align 4
186  call void @__kmpc_end_single(ptr @2, i32 %0)
187  br label %omp_if.end
188
189omp_if.end:                                       ; preds = %omp_if.then, %entry
190  call void @__kmpc_barrier(ptr @3, i32 %0)
191  ret void
192}
193
194declare void @unknown_no_openmp() #2
195
196declare i32 @__kmpc_global_thread_num(ptr) #3
197
198declare void @__kmpc_target_deinit()
199
200define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
201entry:
202  %.zero.addr = alloca i32, align 4
203  %.threadid_temp. = alloca i32, align 4
204  store i32 0, ptr %.zero.addr, align 4
205  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr %dyn)
206  %exec_user_code = icmp eq i32 %0, -1
207  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
208
209user_code.entry:                                  ; preds = %entry
210  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
211  store i32 %1, ptr %.threadid_temp., align 4
212  call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
213  call void @__kmpc_target_deinit()
214  ret void
215
216worker.exit:                                      ; preds = %entry
217  ret void
218}
219
220define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
221entry:
222  %.global_tid..addr = alloca ptr, align 8
223  %.bound_tid..addr = alloca ptr, align 8
224  %captured_vars_addrs = alloca [0 x ptr], align 8
225  %captured_vars_addrs1 = alloca [0 x ptr], align 8
226  store ptr %.global_tid., ptr %.global_tid..addr, align 8
227  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
228  call void @unknown_no_openmp() #8
229  %0 = load ptr, ptr %.global_tid..addr, align 8
230  %1 = load i32, ptr %0, align 4
231  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0)
232  call void @no_parallel_region_in_here() #7
233  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0)
234  ret void
235}
236
237define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
238entry:
239  %.global_tid..addr = alloca ptr, align 8
240  %.bound_tid..addr = alloca ptr, align 8
241  store ptr %.global_tid., ptr %.global_tid..addr, align 8
242  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
243  call void @p0() #7
244  ret void
245}
246
247declare void @p0() #4
248
249define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 {
250entry:
251  %.addr = alloca i16, align 2
252  %.addr1 = alloca i32, align 4
253  %.zero.addr = alloca i32, align 4
254  %global_args = alloca ptr, align 8
255  store i32 0, ptr %.zero.addr, align 4
256  store i16 %0, ptr %.addr, align 2
257  store i32 %1, ptr %.addr1, align 4
258  call void @__kmpc_get_shared_variables(ptr %global_args)
259  call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3
260  ret void
261}
262
263declare void @__kmpc_get_shared_variables(ptr)
264
265declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
266
267define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
268entry:
269  %.global_tid..addr = alloca ptr, align 8
270  %.bound_tid..addr = alloca ptr, align 8
271  store ptr %.global_tid., ptr %.global_tid..addr, align 8
272  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
273  call void @p1() #7
274  ret void
275}
276
277declare void @p1() #4
278
279define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 {
280entry:
281  %.addr = alloca i16, align 2
282  %.addr1 = alloca i32, align 4
283  %.zero.addr = alloca i32, align 4
284  %global_args = alloca ptr, align 8
285  store i32 0, ptr %.zero.addr, align 4
286  store i16 %0, ptr %.addr, align 2
287  store i32 %1, ptr %.addr1, align 4
288  call void @__kmpc_get_shared_variables(ptr %global_args)
289  call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3
290  ret void
291}
292
293define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
294entry:
295  %.zero.addr = alloca i32, align 4
296  %.threadid_temp. = alloca i32, align 4
297  store i32 0, ptr %.zero.addr, align 4
298  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr %dyn)
299  %exec_user_code = icmp eq i32 %0, -1
300  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
301
302user_code.entry:                                  ; preds = %entry
303  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
304  store i32 %1, ptr %.threadid_temp., align 4
305  call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
306  call void @__kmpc_target_deinit()
307  ret void
308
309worker.exit:                                      ; preds = %entry
310  ret void
311}
312
313define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
314entry:
315  %.global_tid..addr = alloca ptr, align 8
316  %.bound_tid..addr = alloca ptr, align 8
317  %captured_vars_addrs = alloca [0 x ptr], align 8
318  store ptr %.global_tid., ptr %.global_tid..addr, align 8
319  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
320  call void @unknown_no_openmp() #8
321  call void @simple_state_machine_interprocedural_before() #7
322  call void @no_parallel_region_in_here() #7
323  %0 = load ptr, ptr %.global_tid..addr, align 8
324  %1 = load i32, ptr %0, align 4
325  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0)
326  call void @simple_state_machine_interprocedural_after() #7
327  ret void
328}
329
330define hidden void @simple_state_machine_interprocedural_before() #1 {
331entry:
332  %captured_vars_addrs = alloca [0 x ptr], align 8
333  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
334  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0)
335  ret void
336}
337
338define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
339entry:
340  %.global_tid..addr = alloca ptr, align 8
341  %.bound_tid..addr = alloca ptr, align 8
342  store ptr %.global_tid., ptr %.global_tid..addr, align 8
343  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
344  call void @p1() #7
345  ret void
346}
347
348define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 {
349entry:
350  %.addr = alloca i16, align 2
351  %.addr1 = alloca i32, align 4
352  %.zero.addr = alloca i32, align 4
353  %global_args = alloca ptr, align 8
354  store i32 0, ptr %.zero.addr, align 4
355  store i16 %0, ptr %.addr, align 2
356  store i32 %1, ptr %.addr1, align 4
357  call void @__kmpc_get_shared_variables(ptr %global_args)
358  call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3
359  ret void
360}
361
362define hidden void @simple_state_machine_interprocedural_after() #1 {
363entry:
364  %captured_vars_addrs = alloca [0 x ptr], align 8
365  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
366  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0)
367  ret void
368}
369
370define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
371entry:
372  %.zero.addr = alloca i32, align 4
373  %.threadid_temp. = alloca i32, align 4
374  store i32 0, ptr %.zero.addr, align 4
375  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr %dyn)
376  %exec_user_code = icmp eq i32 %0, -1
377  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
378
379user_code.entry:                                  ; preds = %entry
380  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
381  store i32 %1, ptr %.threadid_temp., align 4
382  call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
383  call void @__kmpc_target_deinit()
384  ret void
385
386worker.exit:                                      ; preds = %entry
387  ret void
388}
389
390define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
391entry:
392  %.global_tid..addr = alloca ptr, align 8
393  %.bound_tid..addr = alloca ptr, align 8
394  %captured_vars_addrs = alloca [0 x ptr], align 8
395  %captured_vars_addrs1 = alloca [0 x ptr], align 8
396  store ptr %.global_tid., ptr %.global_tid..addr, align 8
397  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
398  %0 = load ptr, ptr %.global_tid..addr, align 8
399  %1 = load i32, ptr %0, align 4
400  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0)
401  %call = call i32 @unknown() #7
402  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0)
403  ret void
404}
405
406define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
407entry:
408  %.global_tid..addr = alloca ptr, align 8
409  %.bound_tid..addr = alloca ptr, align 8
410  store ptr %.global_tid., ptr %.global_tid..addr, align 8
411  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
412  call void @p0() #7
413  ret void
414}
415
416define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 {
417entry:
418  %.addr = alloca i16, align 2
419  %.addr1 = alloca i32, align 4
420  %.zero.addr = alloca i32, align 4
421  %global_args = alloca ptr, align 8
422  store i32 0, ptr %.zero.addr, align 4
423  store i16 %0, ptr %.addr, align 2
424  store i32 %1, ptr %.addr1, align 4
425  call void @__kmpc_get_shared_variables(ptr %global_args)
426  call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3
427  ret void
428}
429
430declare i32 @unknown() #4
431
432define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
433entry:
434  %.global_tid..addr = alloca ptr, align 8
435  %.bound_tid..addr = alloca ptr, align 8
436  store ptr %.global_tid., ptr %.global_tid..addr, align 8
437  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
438  call void @p1() #7
439  ret void
440}
441
442define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 {
443entry:
444  %.addr = alloca i16, align 2
445  %.addr1 = alloca i32, align 4
446  %.zero.addr = alloca i32, align 4
447  %global_args = alloca ptr, align 8
448  store i32 0, ptr %.zero.addr, align 4
449  store i16 %0, ptr %.addr, align 2
450  store i32 %1, ptr %.addr1, align 4
451  call void @__kmpc_get_shared_variables(ptr %global_args)
452  call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3
453  ret void
454}
455
456define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
457entry:
458  %.zero.addr = alloca i32, align 4
459  %.threadid_temp. = alloca i32, align 4
460  store i32 0, ptr %.zero.addr, align 4
461  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr %dyn)
462  %exec_user_code = icmp eq i32 %0, -1
463  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
464
465user_code.entry:                                  ; preds = %entry
466  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
467  store i32 %1, ptr %.threadid_temp., align 4
468  call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
469  call void @__kmpc_target_deinit()
470  ret void
471
472worker.exit:                                      ; preds = %entry
473  ret void
474}
475
476define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
477entry:
478  %.global_tid..addr = alloca ptr, align 8
479  %.bound_tid..addr = alloca ptr, align 8
480  %captured_vars_addrs = alloca [0 x ptr], align 8
481  %captured_vars_addrs1 = alloca [0 x ptr], align 8
482  store ptr %.global_tid., ptr %.global_tid..addr, align 8
483  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
484  %0 = load ptr, ptr %.global_tid..addr, align 8
485  %1 = load i32, ptr %0, align 4
486  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0)
487  call void @unknown_no_openmp() #8
488  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0)
489  ret void
490}
491
492define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
493entry:
494  %.global_tid..addr = alloca ptr, align 8
495  %.bound_tid..addr = alloca ptr, align 8
496  store ptr %.global_tid., ptr %.global_tid..addr, align 8
497  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
498  call void @p0() #7
499  ret void
500}
501
502define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 {
503entry:
504  %.addr = alloca i16, align 2
505  %.addr1 = alloca i32, align 4
506  %.zero.addr = alloca i32, align 4
507  %global_args = alloca ptr, align 8
508  store i32 0, ptr %.zero.addr, align 4
509  store i16 %0, ptr %.addr, align 2
510  store i32 %1, ptr %.addr1, align 4
511  call void @__kmpc_get_shared_variables(ptr %global_args)
512  call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3
513  ret void
514}
515
516define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
517entry:
518  %.global_tid..addr = alloca ptr, align 8
519  %.bound_tid..addr = alloca ptr, align 8
520  store ptr %.global_tid., ptr %.global_tid..addr, align 8
521  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
522  call void @p1() #7
523  ret void
524}
525
526define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 {
527entry:
528  %.addr = alloca i16, align 2
529  %.addr1 = alloca i32, align 4
530  %.zero.addr = alloca i32, align 4
531  %global_args = alloca ptr, align 8
532  store i32 0, ptr %.zero.addr, align 4
533  store i16 %0, ptr %.addr, align 2
534  store i32 %1, ptr %.addr1, align 4
535  call void @__kmpc_get_shared_variables(ptr %global_args)
536  call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3
537  ret void
538}
539
540define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
541entry:
542  %.zero.addr = alloca i32, align 4
543  %.threadid_temp. = alloca i32, align 4
544  store i32 0, ptr %.zero.addr, align 4
545  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr %dyn)
546  %exec_user_code = icmp eq i32 %0, -1
547  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
548
549user_code.entry:                                  ; preds = %entry
550  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
551  store i32 %1, ptr %.threadid_temp., align 4
552  call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
553  call void @__kmpc_target_deinit()
554  ret void
555
556worker.exit:                                      ; preds = %entry
557  ret void
558}
559
560define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
561entry:
562  %.global_tid..addr = alloca ptr, align 8
563  %.bound_tid..addr = alloca ptr, align 8
564  %captured_vars_addrs = alloca [0 x ptr], align 8
565  %captured_vars_addrs1 = alloca [0 x ptr], align 8
566  store ptr %.global_tid., ptr %.global_tid..addr, align 8
567  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
568  call void @unknown_no_openmp() #8
569  %0 = load ptr, ptr %.global_tid..addr, align 8
570  %1 = load i32, ptr %0, align 4
571  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0)
572  call void @unknown_pure() #9
573  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0)
574  ret void
575}
576
577define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
578entry:
579  %.global_tid..addr = alloca ptr, align 8
580  %.bound_tid..addr = alloca ptr, align 8
581  store ptr %.global_tid., ptr %.global_tid..addr, align 8
582  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
583  call void @p0() #7
584  ret void
585}
586
587define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 {
588entry:
589  %.addr = alloca i16, align 2
590  %.addr1 = alloca i32, align 4
591  %.zero.addr = alloca i32, align 4
592  %global_args = alloca ptr, align 8
593  store i32 0, ptr %.zero.addr, align 4
594  store i16 %0, ptr %.addr, align 2
595  store i32 %1, ptr %.addr1, align 4
596  call void @__kmpc_get_shared_variables(ptr %global_args)
597  call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3
598  ret void
599}
600
601declare void @unknown_pure() #5
602
603define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
604entry:
605  %.global_tid..addr = alloca ptr, align 8
606  %.bound_tid..addr = alloca ptr, align 8
607  store ptr %.global_tid., ptr %.global_tid..addr, align 8
608  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
609  call void @p1() #7
610  ret void
611}
612
613define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 {
614entry:
615  %.addr = alloca i16, align 2
616  %.addr1 = alloca i32, align 4
617  %.zero.addr = alloca i32, align 4
618  %global_args = alloca ptr, align 8
619  store i32 0, ptr %.zero.addr, align 4
620  store i16 %0, ptr %.addr, align 2
621  store i32 %1, ptr %.addr1, align 4
622  call void @__kmpc_get_shared_variables(ptr %global_args)
623  call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3
624  ret void
625}
626
627define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
628entry:
629  %.zero.addr = alloca i32, align 4
630  %.threadid_temp. = alloca i32, align 4
631  store i32 0, ptr %.zero.addr, align 4
632  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr %dyn)
633  %exec_user_code = icmp eq i32 %0, -1
634  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
635
636user_code.entry:                                  ; preds = %entry
637  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
638  store i32 %1, ptr %.threadid_temp., align 4
639  call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
640  call void @__kmpc_target_deinit()
641  ret void
642
643worker.exit:                                      ; preds = %entry
644  ret void
645}
646
647define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
648entry:
649  %.global_tid..addr = alloca ptr, align 8
650  %.bound_tid..addr = alloca ptr, align 8
651  store ptr %.global_tid., ptr %.global_tid..addr, align 8
652  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
653  %call = call i32 @omp_get_thread_num() #7
654  call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7
655  ret void
656}
657
658define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
659entry:
660  %a.addr = alloca i32, align 4
661  store i32 %a, ptr %a.addr, align 4
662  %0 = load i32, ptr %a.addr, align 4
663  %cmp = icmp eq i32 %0, 0
664  br i1 %cmp, label %if.then, label %if.end
665
666if.then:                                          ; preds = %entry
667  br label %return
668
669if.end:                                           ; preds = %entry
670  %1 = load i32, ptr %a.addr, align 4
671  %sub = sub nsw i32 %1, 1
672  call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7
673  call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7
674  br label %return
675
676return:                                           ; preds = %if.end, %if.then
677  ret void
678}
679
680declare i32 @omp_get_thread_num(...) #4
681
682define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
683entry:
684  %.zero.addr = alloca i32, align 4
685  %.threadid_temp. = alloca i32, align 4
686  store i32 0, ptr %.zero.addr, align 4
687  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr %dyn)
688  %exec_user_code = icmp eq i32 %0, -1
689  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
690
691user_code.entry:                                  ; preds = %entry
692  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
693  store i32 %1, ptr %.threadid_temp., align 4
694  call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
695  call void @__kmpc_target_deinit()
696  ret void
697
698worker.exit:                                      ; preds = %entry
699  ret void
700}
701
702define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
703entry:
704  %.global_tid..addr = alloca ptr, align 8
705  %.bound_tid..addr = alloca ptr, align 8
706  store ptr %.global_tid., ptr %.global_tid..addr, align 8
707  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
708  call void @weak_callee_empty() #7
709  ret void
710}
711
712define weak hidden void @weak_callee_empty() #1 {
713entry:
714  ret void
715}
716
717declare i32 @__kmpc_single(ptr, i32) #6
718
719declare void @__kmpc_end_single(ptr, i32) #6
720
721declare void @__kmpc_barrier(ptr, i32) #6
722
723define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
724entry:
725  %.global_tid..addr = alloca ptr, align 8
726  %.bound_tid..addr = alloca ptr, align 8
727  store ptr %.global_tid., ptr %.global_tid..addr, align 8
728  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
729  call void @p0() #7
730  ret void
731}
732
733define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 {
734entry:
735  %.addr = alloca i16, align 2
736  %.addr1 = alloca i32, align 4
737  %.zero.addr = alloca i32, align 4
738  %global_args = alloca ptr, align 8
739  store i32 0, ptr %.zero.addr, align 4
740  store i16 %0, ptr %.addr, align 2
741  store i32 %1, ptr %.addr1, align 4
742  call void @__kmpc_get_shared_variables(ptr %global_args)
743  call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3
744  ret void
745}
746
747define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
748entry:
749  %.global_tid..addr = alloca ptr, align 8
750  %.bound_tid..addr = alloca ptr, align 8
751  store ptr %.global_tid., ptr %.global_tid..addr, align 8
752  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
753  call void @p0() #7
754  ret void
755}
756
757define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 {
758entry:
759  %.addr = alloca i16, align 2
760  %.addr1 = alloca i32, align 4
761  %.zero.addr = alloca i32, align 4
762  %global_args = alloca ptr, align 8
763  store i32 0, ptr %.zero.addr, align 4
764  store i16 %0, ptr %.addr, align 2
765  store i32 %1, ptr %.addr1, align 4
766  call void @__kmpc_get_shared_variables(ptr %global_args)
767  call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3
768  ret void
769}
770
771define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
772entry:
773  %captured_vars_addrs = alloca [0 x ptr], align 8
774  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
775  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0)
776  ret void
777}
778
779define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
780entry:
781  %.global_tid..addr = alloca ptr, align 8
782  %.bound_tid..addr = alloca ptr, align 8
783  store ptr %.global_tid., ptr %.global_tid..addr, align 8
784  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
785  call void @p0() #7
786  ret void
787}
788
789define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 {
790entry:
791  %.addr = alloca i16, align 2
792  %.addr1 = alloca i32, align 4
793  %.zero.addr = alloca i32, align 4
794  %global_args = alloca ptr, align 8
795  store i32 0, ptr %.zero.addr, align 4
796  store i16 %0, ptr %.addr, align 2
797  store i32 %1, ptr %.addr1, align 4
798  call void @__kmpc_get_shared_variables(ptr %global_args)
799  call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3
800  ret void
801}
802
803attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
804attributes #1 = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
805attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
806attributes #3 = { nounwind }
807attributes #4 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
808attributes #5 = { convergent nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
809attributes #6 = { convergent nounwind }
810attributes #7 = { convergent }
811attributes #8 = { convergent "llvm.assume"="omp_no_openmp" }
812attributes #9 = { convergent nounwind readonly willreturn }
813
814!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
815!llvm.module.flags = !{!16, !17, !18}
816
817!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
818!1 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
819!2 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
820!3 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
821!4 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
822!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
823!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
824!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
825!16 = !{i32 1, !"wchar_size", i32 4}
826!17 = !{i32 7, !"openmp", i32 50}
827!18 = !{i32 7, !"openmp-device", i32 50}
828;.
829; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
830; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
831; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
832; AMDGPU: @G = external global i32, align 4
833; AMDGPU: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
834; AMDGPU: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
835; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
836; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
837; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
838; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
839; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
840; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
841; AMDGPU: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
842; AMDGPU: @__omp_outlined__2_wrapper.ID = private constant i8 undef
843; AMDGPU: @__omp_outlined__3_wrapper.ID = private constant i8 undef
844; AMDGPU: @__omp_outlined__5_wrapper.ID = private constant i8 undef
845; AMDGPU: @__omp_outlined__7_wrapper.ID = private constant i8 undef
846; AMDGPU: @__omp_outlined__8_wrapper.ID = private constant i8 undef
847; AMDGPU: @__omp_outlined__10_wrapper.ID = private constant i8 undef
848; AMDGPU: @__omp_outlined__11_wrapper.ID = private constant i8 undef
849; AMDGPU: @__omp_outlined__13_wrapper.ID = private constant i8 undef
850; AMDGPU: @__omp_outlined__14_wrapper.ID = private constant i8 undef
851;.
852; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
853; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
854; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
855; NVPTX: @G = external global i32, align 4
856; NVPTX: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
857; NVPTX: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
858; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
859; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
860; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
861; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
862; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
863; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
864; NVPTX: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
865; NVPTX: @__omp_outlined__2_wrapper.ID = private constant i8 undef
866; NVPTX: @__omp_outlined__3_wrapper.ID = private constant i8 undef
867; NVPTX: @__omp_outlined__5_wrapper.ID = private constant i8 undef
868; NVPTX: @__omp_outlined__7_wrapper.ID = private constant i8 undef
869; NVPTX: @__omp_outlined__8_wrapper.ID = private constant i8 undef
870; NVPTX: @__omp_outlined__10_wrapper.ID = private constant i8 undef
871; NVPTX: @__omp_outlined__11_wrapper.ID = private constant i8 undef
872; NVPTX: @__omp_outlined__13_wrapper.ID = private constant i8 undef
873; NVPTX: @__omp_outlined__14_wrapper.ID = private constant i8 undef
874;.
875; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
876; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
877; AMDGPU-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
878; AMDGPU-DISABLED: @G = external global i32, align 4
879; AMDGPU-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
880; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
881; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
882; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
883; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
884; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
885; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
886; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
887; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
888;.
889; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
890; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
891; NVPTX-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
892; NVPTX-DISABLED: @G = external global i32, align 4
893; NVPTX-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
894; NVPTX-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
895; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
896; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
897; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
898; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
899; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
900; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
901; NVPTX-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
902;.
903; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
904; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
905; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
906; AMDGPU-NEXT:  entry:
907; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
908; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
909; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
910; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
911; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
912; AMDGPU:       user_code.entry:
913; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
914; AMDGPU-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
915; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
916; AMDGPU-NEXT:    ret void
917; AMDGPU:       worker.exit:
918; AMDGPU-NEXT:    ret void
919;
920;
921; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
922; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
923; AMDGPU-NEXT:    ret i32 0
924;
925;
926; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
927; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
928; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
929; AMDGPU-NEXT:  entry:
930; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
931; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
932; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
933; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
934; AMDGPU-NEXT:    ret void
935;
936;
937; AMDGPU: Function Attrs: convergent noinline nounwind
938; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
939; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
940; AMDGPU-NEXT:  entry:
941; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
942; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
943; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
944; AMDGPU-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
945; AMDGPU:       omp_if.then:
946; AMDGPU-NEXT:    store i32 0, ptr @G, align 4
947; AMDGPU-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
948; AMDGPU-NEXT:    br label [[OMP_IF_END]]
949; AMDGPU:       omp_if.end:
950; AMDGPU-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
951; AMDGPU-NEXT:    ret void
952;
953;
954; AMDGPU: Function Attrs: convergent noinline nounwind
955; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here
956; AMDGPU-SAME: () #[[ATTR1]] {
957; AMDGPU-NEXT:  entry:
958; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
959; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
960; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
961; AMDGPU-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
962; AMDGPU:       omp_if.then:
963; AMDGPU-NEXT:    store i32 0, ptr @G, align 4
964; AMDGPU-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
965; AMDGPU-NEXT:    br label [[OMP_IF_END]]
966; AMDGPU:       omp_if.end:
967; AMDGPU-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
968; AMDGPU-NEXT:    ret void
969;
970;
971; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
972; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
973; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
974; AMDGPU-NEXT:  entry:
975; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
976; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
977; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
978; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
979; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
980; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
981; AMDGPU:       is_worker_check:
982; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
983; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
984; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
985; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
986; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
987; AMDGPU:       worker_state_machine.begin:
988; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
989; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
990; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
991; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
992; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
993; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
994; AMDGPU:       worker_state_machine.finished:
995; AMDGPU-NEXT:    ret void
996; AMDGPU:       worker_state_machine.is_active.check:
997; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
998; AMDGPU:       worker_state_machine.parallel_region.check:
999; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__2_wrapper.ID
1000; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1001; AMDGPU:       worker_state_machine.parallel_region.execute:
1002; AMDGPU-NEXT:    call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
1003; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1004; AMDGPU:       worker_state_machine.parallel_region.check1:
1005; AMDGPU-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
1006; AMDGPU:       worker_state_machine.parallel_region.execute2:
1007; AMDGPU-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1008; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1009; AMDGPU:       worker_state_machine.parallel_region.check3:
1010; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1011; AMDGPU:       worker_state_machine.parallel_region.end:
1012; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
1013; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1014; AMDGPU:       worker_state_machine.done.barrier:
1015; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1016; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1017; AMDGPU:       thread.user_code.check:
1018; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1019; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1020; AMDGPU:       user_code.entry:
1021; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1022; AMDGPU-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1023; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1024; AMDGPU-NEXT:    ret void
1025; AMDGPU:       worker.exit:
1026; AMDGPU-NEXT:    ret void
1027;
1028;
1029; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1030; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
1031; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1032; AMDGPU-NEXT:  entry:
1033; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1034; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1035; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1036; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
1037; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1038; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
1039; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1040; AMDGPU-NEXT:    ret void
1041;
1042;
1043; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1044; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
1045; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1046; AMDGPU-NEXT:  entry:
1047; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1048; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1049; AMDGPU-NEXT:    call void @p0() #[[ATTR11:[0-9]+]]
1050; AMDGPU-NEXT:    ret void
1051;
1052;
1053; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1054; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
1055; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1056; AMDGPU-NEXT:  entry:
1057; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1058; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1059; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1060; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1061; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1062; AMDGPU-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1063; AMDGPU-NEXT:    ret void
1064;
1065;
1066; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1067; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
1068; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1069; AMDGPU-NEXT:  entry:
1070; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1071; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1072; AMDGPU-NEXT:    call void @p1() #[[ATTR11]]
1073; AMDGPU-NEXT:    ret void
1074;
1075;
1076; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1077; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1078; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1079; AMDGPU-NEXT:  entry:
1080; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1081; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1082; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1083; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1084; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1085; AMDGPU-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1086; AMDGPU-NEXT:    ret void
1087;
1088;
1089; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1090; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
1091; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1092; AMDGPU-NEXT:  entry:
1093; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1094; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1095; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1096; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
1097; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1098; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1099; AMDGPU:       is_worker_check:
1100; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1101; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1102; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1103; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1104; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1105; AMDGPU:       worker_state_machine.begin:
1106; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1107; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1108; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1109; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1110; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1111; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1112; AMDGPU:       worker_state_machine.finished:
1113; AMDGPU-NEXT:    ret void
1114; AMDGPU:       worker_state_machine.is_active.check:
1115; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1116; AMDGPU:       worker_state_machine.parallel_region.check:
1117; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__17_wrapper
1118; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1119; AMDGPU:       worker_state_machine.parallel_region.execute:
1120; AMDGPU-NEXT:    call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
1121; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1122; AMDGPU:       worker_state_machine.parallel_region.check1:
1123; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID
1124; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
1125; AMDGPU:       worker_state_machine.parallel_region.execute2:
1126; AMDGPU-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1127; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1128; AMDGPU:       worker_state_machine.parallel_region.check3:
1129; AMDGPU-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]]
1130; AMDGPU:       worker_state_machine.parallel_region.execute5:
1131; AMDGPU-NEXT:    call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]])
1132; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1133; AMDGPU:       worker_state_machine.parallel_region.check6:
1134; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1135; AMDGPU:       worker_state_machine.parallel_region.end:
1136; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
1137; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1138; AMDGPU:       worker_state_machine.done.barrier:
1139; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1140; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1141; AMDGPU:       thread.user_code.check:
1142; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1143; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1144; AMDGPU:       user_code.entry:
1145; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1146; AMDGPU-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1147; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1148; AMDGPU-NEXT:    ret void
1149; AMDGPU:       worker.exit:
1150; AMDGPU-NEXT:    ret void
1151;
1152;
1153; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1154; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
1155; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1156; AMDGPU-NEXT:  entry:
1157; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1158; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1159; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
1160; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
1161; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
1162; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1163; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
1164; AMDGPU-NEXT:    ret void
1165;
1166;
1167; AMDGPU: Function Attrs: noinline nounwind
1168; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
1169; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] {
1170; AMDGPU-NEXT:  entry:
1171; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1172; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1173; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1174; AMDGPU-NEXT:    ret void
1175;
1176;
1177; AMDGPU: Function Attrs: convergent noinline nounwind
1178; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
1179; AMDGPU-SAME: () #[[ATTR1]] {
1180; AMDGPU-NEXT:  entry:
1181; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1182; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1183; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1184; AMDGPU-NEXT:    ret void
1185;
1186;
1187; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1188; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
1189; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1190; AMDGPU-NEXT:  entry:
1191; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1192; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1193; AMDGPU-NEXT:    call void @p1() #[[ATTR11]]
1194; AMDGPU-NEXT:    ret void
1195;
1196;
1197; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1198; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1199; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1200; AMDGPU-NEXT:  entry:
1201; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1202; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1203; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1204; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1205; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1206; AMDGPU-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1207; AMDGPU-NEXT:    ret void
1208;
1209;
1210; AMDGPU: Function Attrs: noinline nounwind
1211; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
1212; AMDGPU-SAME: () #[[ATTR6]] {
1213; AMDGPU-NEXT:  entry:
1214; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1215; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1216; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1217; AMDGPU-NEXT:    ret void
1218;
1219;
1220; AMDGPU: Function Attrs: convergent noinline nounwind
1221; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
1222; AMDGPU-SAME: () #[[ATTR1]] {
1223; AMDGPU-NEXT:  entry:
1224; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1225; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1226; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1227; AMDGPU-NEXT:    ret void
1228;
1229;
1230; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1231; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
1232; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1233; AMDGPU-NEXT:  entry:
1234; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1235; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1236; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1237; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
1238; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1239; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1240; AMDGPU:       is_worker_check:
1241; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1242; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1243; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1244; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1245; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1246; AMDGPU:       worker_state_machine.begin:
1247; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1248; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1249; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1250; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1251; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1252; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1253; AMDGPU:       worker_state_machine.finished:
1254; AMDGPU-NEXT:    ret void
1255; AMDGPU:       worker_state_machine.is_active.check:
1256; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1257; AMDGPU:       worker_state_machine.parallel_region.check:
1258; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID
1259; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1260; AMDGPU:       worker_state_machine.parallel_region.execute:
1261; AMDGPU-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
1262; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1263; AMDGPU:       worker_state_machine.parallel_region.check1:
1264; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__8_wrapper.ID
1265; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
1266; AMDGPU:       worker_state_machine.parallel_region.execute2:
1267; AMDGPU-NEXT:    call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
1268; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1269; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
1270; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
1271; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1272; AMDGPU:       worker_state_machine.parallel_region.end:
1273; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
1274; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1275; AMDGPU:       worker_state_machine.done.barrier:
1276; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1277; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1278; AMDGPU:       thread.user_code.check:
1279; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1280; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1281; AMDGPU:       user_code.entry:
1282; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1283; AMDGPU-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1284; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1285; AMDGPU-NEXT:    ret void
1286; AMDGPU:       worker.exit:
1287; AMDGPU-NEXT:    ret void
1288;
1289;
1290; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1291; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
1292; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1293; AMDGPU-NEXT:  entry:
1294; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1295; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1296; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1297; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1298; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
1299; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1300; AMDGPU-NEXT:    ret void
1301;
1302;
1303; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1304; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
1305; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1306; AMDGPU-NEXT:  entry:
1307; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1308; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1309; AMDGPU-NEXT:    call void @p0() #[[ATTR11]]
1310; AMDGPU-NEXT:    ret void
1311;
1312;
1313; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1314; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
1315; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1316; AMDGPU-NEXT:  entry:
1317; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1318; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1319; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1320; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1321; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1322; AMDGPU-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1323; AMDGPU-NEXT:    ret void
1324;
1325;
1326; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1327; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
1328; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1329; AMDGPU-NEXT:  entry:
1330; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1331; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1332; AMDGPU-NEXT:    call void @p1() #[[ATTR11]]
1333; AMDGPU-NEXT:    ret void
1334;
1335;
1336; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1337; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
1338; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1339; AMDGPU-NEXT:  entry:
1340; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1341; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1342; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1343; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1344; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1345; AMDGPU-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1346; AMDGPU-NEXT:    ret void
1347;
1348;
1349; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1350; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
1351; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1352; AMDGPU-NEXT:  entry:
1353; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1354; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1355; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1356; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
1357; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1358; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1359; AMDGPU:       is_worker_check:
1360; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1361; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1362; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1363; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1364; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1365; AMDGPU:       worker_state_machine.begin:
1366; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1367; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1368; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1369; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1370; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1371; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1372; AMDGPU:       worker_state_machine.finished:
1373; AMDGPU-NEXT:    ret void
1374; AMDGPU:       worker_state_machine.is_active.check:
1375; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1376; AMDGPU:       worker_state_machine.parallel_region.check:
1377; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__10_wrapper.ID
1378; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1379; AMDGPU:       worker_state_machine.parallel_region.execute:
1380; AMDGPU-NEXT:    call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
1381; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1382; AMDGPU:       worker_state_machine.parallel_region.check1:
1383; AMDGPU-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
1384; AMDGPU:       worker_state_machine.parallel_region.execute2:
1385; AMDGPU-NEXT:    call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]])
1386; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1387; AMDGPU:       worker_state_machine.parallel_region.check3:
1388; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1389; AMDGPU:       worker_state_machine.parallel_region.end:
1390; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
1391; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1392; AMDGPU:       worker_state_machine.done.barrier:
1393; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1394; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1395; AMDGPU:       thread.user_code.check:
1396; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1397; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1398; AMDGPU:       user_code.entry:
1399; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1400; AMDGPU-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1401; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1402; AMDGPU-NEXT:    ret void
1403; AMDGPU:       worker.exit:
1404; AMDGPU-NEXT:    ret void
1405;
1406;
1407; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1408; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
1409; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1410; AMDGPU-NEXT:  entry:
1411; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1412; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1413; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1414; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1415; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
1416; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1417; AMDGPU-NEXT:    ret void
1418;
1419;
1420; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1421; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10
1422; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1423; AMDGPU-NEXT:  entry:
1424; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1425; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1426; AMDGPU-NEXT:    call void @p0() #[[ATTR11]]
1427; AMDGPU-NEXT:    ret void
1428;
1429;
1430; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1431; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
1432; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1433; AMDGPU-NEXT:  entry:
1434; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1435; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1436; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1437; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1438; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1439; AMDGPU-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1440; AMDGPU-NEXT:    ret void
1441;
1442;
1443; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1444; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11
1445; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1446; AMDGPU-NEXT:  entry:
1447; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1448; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1449; AMDGPU-NEXT:    call void @p1() #[[ATTR11]]
1450; AMDGPU-NEXT:    ret void
1451;
1452;
1453; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1454; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
1455; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1456; AMDGPU-NEXT:  entry:
1457; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1458; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1459; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1460; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1461; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1462; AMDGPU-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1463; AMDGPU-NEXT:    ret void
1464;
1465;
1466; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1467; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
1468; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1469; AMDGPU-NEXT:  entry:
1470; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1471; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1472; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1473; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
1474; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1475; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1476; AMDGPU:       is_worker_check:
1477; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1478; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1479; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1480; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1481; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1482; AMDGPU:       worker_state_machine.begin:
1483; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1484; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1485; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1486; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1487; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1488; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1489; AMDGPU:       worker_state_machine.finished:
1490; AMDGPU-NEXT:    ret void
1491; AMDGPU:       worker_state_machine.is_active.check:
1492; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1493; AMDGPU:       worker_state_machine.parallel_region.check:
1494; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__13_wrapper.ID
1495; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1496; AMDGPU:       worker_state_machine.parallel_region.execute:
1497; AMDGPU-NEXT:    call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]])
1498; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1499; AMDGPU:       worker_state_machine.parallel_region.check1:
1500; AMDGPU-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
1501; AMDGPU:       worker_state_machine.parallel_region.execute2:
1502; AMDGPU-NEXT:    call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]])
1503; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1504; AMDGPU:       worker_state_machine.parallel_region.check3:
1505; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1506; AMDGPU:       worker_state_machine.parallel_region.end:
1507; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
1508; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1509; AMDGPU:       worker_state_machine.done.barrier:
1510; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1511; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1512; AMDGPU:       thread.user_code.check:
1513; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1514; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1515; AMDGPU:       user_code.entry:
1516; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1517; AMDGPU-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1518; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1519; AMDGPU-NEXT:    ret void
1520; AMDGPU:       worker.exit:
1521; AMDGPU-NEXT:    ret void
1522;
1523;
1524; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1525; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12
1526; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1527; AMDGPU-NEXT:  entry:
1528; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1529; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1530; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1531; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
1532; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1533; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1534; AMDGPU-NEXT:    ret void
1535;
1536;
1537; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1538; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13
1539; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1540; AMDGPU-NEXT:  entry:
1541; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1542; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1543; AMDGPU-NEXT:    call void @p0() #[[ATTR11]]
1544; AMDGPU-NEXT:    ret void
1545;
1546;
1547; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1548; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
1549; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1550; AMDGPU-NEXT:  entry:
1551; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1552; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1553; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1554; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1555; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1556; AMDGPU-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1557; AMDGPU-NEXT:    ret void
1558;
1559;
1560; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1561; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14
1562; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1563; AMDGPU-NEXT:  entry:
1564; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1565; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1566; AMDGPU-NEXT:    call void @p1() #[[ATTR11]]
1567; AMDGPU-NEXT:    ret void
1568;
1569;
1570; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1571; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
1572; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1573; AMDGPU-NEXT:  entry:
1574; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1575; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1576; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1577; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1578; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1579; AMDGPU-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1580; AMDGPU-NEXT:    ret void
1581;
1582;
1583; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1584; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
1585; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1586; AMDGPU-NEXT:  entry:
1587; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1588; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1589; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
1590; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1591; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1592; AMDGPU:       user_code.entry:
1593; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1594; AMDGPU-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1595; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1596; AMDGPU-NEXT:    ret void
1597; AMDGPU:       worker.exit:
1598; AMDGPU-NEXT:    ret void
1599;
1600;
1601; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1602; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15
1603; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1604; AMDGPU-NEXT:  entry:
1605; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1606; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1607; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
1608; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
1609; AMDGPU-NEXT:    ret void
1610;
1611;
1612; AMDGPU: Function Attrs: noinline nounwind
1613; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
1614; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
1615; AMDGPU-NEXT:  entry:
1616; AMDGPU-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
1617; AMDGPU-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
1618; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
1619; AMDGPU-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
1620; AMDGPU-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
1621; AMDGPU:       if.then:
1622; AMDGPU-NEXT:    br label [[RETURN:%.*]]
1623; AMDGPU:       if.end:
1624; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
1625; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
1626; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
1627; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
1628; AMDGPU-NEXT:    br label [[RETURN]]
1629; AMDGPU:       return:
1630; AMDGPU-NEXT:    ret void
1631;
1632;
1633; AMDGPU: Function Attrs: convergent noinline nounwind
1634; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
1635; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
1636; AMDGPU-NEXT:  entry:
1637; AMDGPU-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
1638; AMDGPU-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
1639; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
1640; AMDGPU-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
1641; AMDGPU-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
1642; AMDGPU:       if.then:
1643; AMDGPU-NEXT:    br label [[RETURN:%.*]]
1644; AMDGPU:       if.end:
1645; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
1646; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
1647; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
1648; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
1649; AMDGPU-NEXT:    br label [[RETURN]]
1650; AMDGPU:       return:
1651; AMDGPU-NEXT:    ret void
1652;
1653;
1654; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1655; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
1656; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1657; AMDGPU-NEXT:  entry:
1658; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1659; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1660; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1661; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
1662; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1663; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1664; AMDGPU:       is_worker_check:
1665; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1666; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1667; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1668; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1669; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1670; AMDGPU:       worker_state_machine.begin:
1671; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1672; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1673; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1674; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1675; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1676; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1677; AMDGPU:       worker_state_machine.finished:
1678; AMDGPU-NEXT:    ret void
1679; AMDGPU:       worker_state_machine.is_active.check:
1680; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1681; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
1682; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
1683; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1684; AMDGPU:       worker_state_machine.parallel_region.end:
1685; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
1686; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1687; AMDGPU:       worker_state_machine.done.barrier:
1688; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1689; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1690; AMDGPU:       thread.user_code.check:
1691; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1692; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1693; AMDGPU:       user_code.entry:
1694; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1695; AMDGPU-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1696; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1697; AMDGPU-NEXT:    ret void
1698; AMDGPU:       worker.exit:
1699; AMDGPU-NEXT:    ret void
1700;
1701;
1702; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1703; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16
1704; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1705; AMDGPU-NEXT:  entry:
1706; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1707; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1708; AMDGPU-NEXT:    call void @weak_callee_empty() #[[ATTR9]]
1709; AMDGPU-NEXT:    ret void
1710;
1711;
1712; AMDGPU: Function Attrs: convergent noinline nounwind
1713; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty
1714; AMDGPU-SAME: () #[[ATTR1]] {
1715; AMDGPU-NEXT:  entry:
1716; AMDGPU-NEXT:    ret void
1717;
1718;
1719; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1720; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17
1721; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1722; AMDGPU-NEXT:  entry:
1723; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1724; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1725; AMDGPU-NEXT:    call void @p0() #[[ATTR11]]
1726; AMDGPU-NEXT:    ret void
1727;
1728;
1729; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1730; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
1731; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1732; AMDGPU-NEXT:  entry:
1733; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1734; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1735; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1736; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1737; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1738; AMDGPU-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1739; AMDGPU-NEXT:    ret void
1740;
1741;
1742; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1743; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18
1744; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1745; AMDGPU-NEXT:  entry:
1746; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1747; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1748; AMDGPU-NEXT:    call void @p0() #[[ATTR11]]
1749; AMDGPU-NEXT:    ret void
1750;
1751;
1752; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1753; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
1754; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1755; AMDGPU-NEXT:  entry:
1756; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1757; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1758; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1759; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1760; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1761; AMDGPU-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1762; AMDGPU-NEXT:    ret void
1763;
1764;
1765; AMDGPU: Function Attrs: noinline nounwind
1766; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
1767; AMDGPU-SAME: () #[[ATTR6]] {
1768; AMDGPU-NEXT:  entry:
1769; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1770; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1771; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1772; AMDGPU-NEXT:    ret void
1773;
1774;
1775; AMDGPU: Function Attrs: convergent noinline nounwind
1776; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
1777; AMDGPU-SAME: () #[[ATTR1]] {
1778; AMDGPU-NEXT:  entry:
1779; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1780; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1781; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1782; AMDGPU-NEXT:    ret void
1783;
1784;
1785; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1786; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19
1787; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1788; AMDGPU-NEXT:  entry:
1789; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1790; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1791; AMDGPU-NEXT:    call void @p0() #[[ATTR11]]
1792; AMDGPU-NEXT:    ret void
1793;
1794;
1795; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
1796; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
1797; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1798; AMDGPU-NEXT:  entry:
1799; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1800; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1801; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1802; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1803; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1804; AMDGPU-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1805; AMDGPU-NEXT:    ret void
1806;
1807;
1808; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1809; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
1810; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
1811; NVPTX-NEXT:  entry:
1812; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1813; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1814; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
1815; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1816; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1817; NVPTX:       user_code.entry:
1818; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
1819; NVPTX-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1820; NVPTX-NEXT:    call void @__kmpc_target_deinit()
1821; NVPTX-NEXT:    ret void
1822; NVPTX:       worker.exit:
1823; NVPTX-NEXT:    ret void
1824;
1825;
1826; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
1827; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
1828; NVPTX-NEXT:    ret i32 0
1829;
1830;
1831; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1832; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
1833; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1834; NVPTX-NEXT:  entry:
1835; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1836; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1837; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
1838; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
1839; NVPTX-NEXT:    ret void
1840;
1841;
1842; NVPTX: Function Attrs: convergent noinline nounwind
1843; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
1844; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
1845; NVPTX-NEXT:  entry:
1846; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
1847; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
1848; NVPTX-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
1849; NVPTX-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
1850; NVPTX:       omp_if.then:
1851; NVPTX-NEXT:    store i32 0, ptr @G, align 4
1852; NVPTX-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
1853; NVPTX-NEXT:    br label [[OMP_IF_END]]
1854; NVPTX:       omp_if.end:
1855; NVPTX-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
1856; NVPTX-NEXT:    ret void
1857;
1858;
1859; NVPTX: Function Attrs: convergent noinline nounwind
1860; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here
1861; NVPTX-SAME: () #[[ATTR1]] {
1862; NVPTX-NEXT:  entry:
1863; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
1864; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
1865; NVPTX-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
1866; NVPTX-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
1867; NVPTX:       omp_if.then:
1868; NVPTX-NEXT:    store i32 0, ptr @G, align 4
1869; NVPTX-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
1870; NVPTX-NEXT:    br label [[OMP_IF_END]]
1871; NVPTX:       omp_if.end:
1872; NVPTX-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
1873; NVPTX-NEXT:    ret void
1874;
1875;
1876; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1877; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
1878; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1879; NVPTX-NEXT:  entry:
1880; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1881; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1882; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1883; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
1884; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1885; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1886; NVPTX:       is_worker_check:
1887; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1888; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1889; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1890; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1891; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1892; NVPTX:       worker_state_machine.begin:
1893; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1894; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1895; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1896; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1897; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1898; NVPTX:       worker_state_machine.finished:
1899; NVPTX-NEXT:    ret void
1900; NVPTX:       worker_state_machine.is_active.check:
1901; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1902; NVPTX:       worker_state_machine.parallel_region.check:
1903; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__2_wrapper.ID
1904; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1905; NVPTX:       worker_state_machine.parallel_region.execute:
1906; NVPTX-NEXT:    call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
1907; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1908; NVPTX:       worker_state_machine.parallel_region.check1:
1909; NVPTX-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
1910; NVPTX:       worker_state_machine.parallel_region.execute2:
1911; NVPTX-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
1912; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1913; NVPTX:       worker_state_machine.parallel_region.check3:
1914; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1915; NVPTX:       worker_state_machine.parallel_region.end:
1916; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
1917; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1918; NVPTX:       worker_state_machine.done.barrier:
1919; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1920; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1921; NVPTX:       thread.user_code.check:
1922; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1923; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1924; NVPTX:       user_code.entry:
1925; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
1926; NVPTX-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1927; NVPTX-NEXT:    call void @__kmpc_target_deinit()
1928; NVPTX-NEXT:    ret void
1929; NVPTX:       worker.exit:
1930; NVPTX-NEXT:    ret void
1931;
1932;
1933; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1934; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
1935; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1936; NVPTX-NEXT:  entry:
1937; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1938; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1939; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
1940; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
1941; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1942; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
1943; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
1944; NVPTX-NEXT:    ret void
1945;
1946;
1947; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1948; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
1949; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1950; NVPTX-NEXT:  entry:
1951; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1952; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1953; NVPTX-NEXT:    call void @p0() #[[ATTR11:[0-9]+]]
1954; NVPTX-NEXT:    ret void
1955;
1956;
1957; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1958; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
1959; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1960; NVPTX-NEXT:  entry:
1961; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1962; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1963; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1964; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1965; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1966; NVPTX-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1967; NVPTX-NEXT:    ret void
1968;
1969;
1970; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1971; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
1972; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
1973; NVPTX-NEXT:  entry:
1974; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
1975; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
1976; NVPTX-NEXT:    call void @p1() #[[ATTR11]]
1977; NVPTX-NEXT:    ret void
1978;
1979;
1980; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1981; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1982; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
1983; NVPTX-NEXT:  entry:
1984; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
1985; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1986; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1987; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1988; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1989; NVPTX-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
1990; NVPTX-NEXT:    ret void
1991;
1992;
1993; NVPTX: Function Attrs: convergent noinline norecurse nounwind
1994; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
1995; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
1996; NVPTX-NEXT:  entry:
1997; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1998; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1999; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2000; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
2001; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2002; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2003; NVPTX:       is_worker_check:
2004; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2005; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2006; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2007; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2008; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2009; NVPTX:       worker_state_machine.begin:
2010; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2011; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2012; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2013; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2014; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2015; NVPTX:       worker_state_machine.finished:
2016; NVPTX-NEXT:    ret void
2017; NVPTX:       worker_state_machine.is_active.check:
2018; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2019; NVPTX:       worker_state_machine.parallel_region.check:
2020; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__17_wrapper
2021; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2022; NVPTX:       worker_state_machine.parallel_region.execute:
2023; NVPTX-NEXT:    call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
2024; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2025; NVPTX:       worker_state_machine.parallel_region.check1:
2026; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID
2027; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
2028; NVPTX:       worker_state_machine.parallel_region.execute2:
2029; NVPTX-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
2030; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2031; NVPTX:       worker_state_machine.parallel_region.check3:
2032; NVPTX-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]]
2033; NVPTX:       worker_state_machine.parallel_region.execute5:
2034; NVPTX-NEXT:    call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]])
2035; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2036; NVPTX:       worker_state_machine.parallel_region.check6:
2037; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2038; NVPTX:       worker_state_machine.parallel_region.end:
2039; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
2040; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2041; NVPTX:       worker_state_machine.done.barrier:
2042; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2043; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2044; NVPTX:       thread.user_code.check:
2045; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2046; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2047; NVPTX:       user_code.entry:
2048; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2049; NVPTX-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2050; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2051; NVPTX-NEXT:    ret void
2052; NVPTX:       worker.exit:
2053; NVPTX-NEXT:    ret void
2054;
2055;
2056; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2057; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
2058; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2059; NVPTX-NEXT:  entry:
2060; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2061; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2062; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
2063; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
2064; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
2065; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2066; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
2067; NVPTX-NEXT:    ret void
2068;
2069;
2070; NVPTX: Function Attrs: noinline nounwind
2071; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
2072; NVPTX-SAME: () #[[ATTR6:[0-9]+]] {
2073; NVPTX-NEXT:  entry:
2074; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2075; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2076; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2077; NVPTX-NEXT:    ret void
2078;
2079;
2080; NVPTX: Function Attrs: convergent noinline nounwind
2081; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
2082; NVPTX-SAME: () #[[ATTR1]] {
2083; NVPTX-NEXT:  entry:
2084; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2085; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2086; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2087; NVPTX-NEXT:    ret void
2088;
2089;
2090; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2091; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
2092; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2093; NVPTX-NEXT:  entry:
2094; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2095; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2096; NVPTX-NEXT:    call void @p1() #[[ATTR11]]
2097; NVPTX-NEXT:    ret void
2098;
2099;
2100; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2101; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2102; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2103; NVPTX-NEXT:  entry:
2104; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2105; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2106; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2107; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2108; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2109; NVPTX-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2110; NVPTX-NEXT:    ret void
2111;
2112;
2113; NVPTX: Function Attrs: noinline nounwind
2114; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
2115; NVPTX-SAME: () #[[ATTR6]] {
2116; NVPTX-NEXT:  entry:
2117; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2118; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2119; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2120; NVPTX-NEXT:    ret void
2121;
2122;
2123; NVPTX: Function Attrs: convergent noinline nounwind
2124; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
2125; NVPTX-SAME: () #[[ATTR1]] {
2126; NVPTX-NEXT:  entry:
2127; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2128; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2129; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2130; NVPTX-NEXT:    ret void
2131;
2132;
2133; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2134; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
2135; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2136; NVPTX-NEXT:  entry:
2137; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2138; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2139; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2140; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
2141; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2142; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2143; NVPTX:       is_worker_check:
2144; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2145; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2146; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2147; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2148; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2149; NVPTX:       worker_state_machine.begin:
2150; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2151; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2152; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2153; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2154; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2155; NVPTX:       worker_state_machine.finished:
2156; NVPTX-NEXT:    ret void
2157; NVPTX:       worker_state_machine.is_active.check:
2158; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2159; NVPTX:       worker_state_machine.parallel_region.check:
2160; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID
2161; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2162; NVPTX:       worker_state_machine.parallel_region.execute:
2163; NVPTX-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2164; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2165; NVPTX:       worker_state_machine.parallel_region.check1:
2166; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__8_wrapper.ID
2167; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2168; NVPTX:       worker_state_machine.parallel_region.execute2:
2169; NVPTX-NEXT:    call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
2170; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2171; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
2172; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2173; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2174; NVPTX:       worker_state_machine.parallel_region.end:
2175; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
2176; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2177; NVPTX:       worker_state_machine.done.barrier:
2178; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2179; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2180; NVPTX:       thread.user_code.check:
2181; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2182; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2183; NVPTX:       user_code.entry:
2184; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2185; NVPTX-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2186; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2187; NVPTX-NEXT:    ret void
2188; NVPTX:       worker.exit:
2189; NVPTX-NEXT:    ret void
2190;
2191;
2192; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2193; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
2194; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2195; NVPTX-NEXT:  entry:
2196; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2197; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2198; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2199; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2200; NVPTX-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
2201; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2202; NVPTX-NEXT:    ret void
2203;
2204;
2205; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2206; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
2207; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2208; NVPTX-NEXT:  entry:
2209; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2210; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2211; NVPTX-NEXT:    call void @p0() #[[ATTR11]]
2212; NVPTX-NEXT:    ret void
2213;
2214;
2215; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2216; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2217; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2218; NVPTX-NEXT:  entry:
2219; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2220; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2221; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2222; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2223; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2224; NVPTX-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2225; NVPTX-NEXT:    ret void
2226;
2227;
2228; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2229; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
2230; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2231; NVPTX-NEXT:  entry:
2232; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2233; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2234; NVPTX-NEXT:    call void @p1() #[[ATTR11]]
2235; NVPTX-NEXT:    ret void
2236;
2237;
2238; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2239; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
2240; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2241; NVPTX-NEXT:  entry:
2242; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2243; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2244; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2245; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2246; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2247; NVPTX-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2248; NVPTX-NEXT:    ret void
2249;
2250;
2251; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2252; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
2253; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2254; NVPTX-NEXT:  entry:
2255; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2256; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2257; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2258; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
2259; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2260; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2261; NVPTX:       is_worker_check:
2262; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2263; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2264; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2265; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2266; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2267; NVPTX:       worker_state_machine.begin:
2268; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2269; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2270; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2271; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2272; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2273; NVPTX:       worker_state_machine.finished:
2274; NVPTX-NEXT:    ret void
2275; NVPTX:       worker_state_machine.is_active.check:
2276; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2277; NVPTX:       worker_state_machine.parallel_region.check:
2278; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__10_wrapper.ID
2279; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2280; NVPTX:       worker_state_machine.parallel_region.execute:
2281; NVPTX-NEXT:    call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
2282; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2283; NVPTX:       worker_state_machine.parallel_region.check1:
2284; NVPTX-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
2285; NVPTX:       worker_state_machine.parallel_region.execute2:
2286; NVPTX-NEXT:    call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]])
2287; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2288; NVPTX:       worker_state_machine.parallel_region.check3:
2289; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2290; NVPTX:       worker_state_machine.parallel_region.end:
2291; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
2292; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2293; NVPTX:       worker_state_machine.done.barrier:
2294; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2295; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2296; NVPTX:       thread.user_code.check:
2297; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2298; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2299; NVPTX:       user_code.entry:
2300; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2301; NVPTX-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2302; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2303; NVPTX-NEXT:    ret void
2304; NVPTX:       worker.exit:
2305; NVPTX-NEXT:    ret void
2306;
2307;
2308; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2309; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
2310; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2311; NVPTX-NEXT:  entry:
2312; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2313; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2314; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2315; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2316; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
2317; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2318; NVPTX-NEXT:    ret void
2319;
2320;
2321; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2322; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10
2323; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2324; NVPTX-NEXT:  entry:
2325; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2326; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2327; NVPTX-NEXT:    call void @p0() #[[ATTR11]]
2328; NVPTX-NEXT:    ret void
2329;
2330;
2331; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2332; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
2333; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2334; NVPTX-NEXT:  entry:
2335; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2336; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2337; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2338; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2339; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2340; NVPTX-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2341; NVPTX-NEXT:    ret void
2342;
2343;
2344; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2345; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11
2346; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2347; NVPTX-NEXT:  entry:
2348; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2349; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2350; NVPTX-NEXT:    call void @p1() #[[ATTR11]]
2351; NVPTX-NEXT:    ret void
2352;
2353;
2354; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2355; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
2356; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2357; NVPTX-NEXT:  entry:
2358; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2359; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2360; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2361; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2362; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2363; NVPTX-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2364; NVPTX-NEXT:    ret void
2365;
2366;
2367; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2368; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
2369; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2370; NVPTX-NEXT:  entry:
2371; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2372; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2373; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2374; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
2375; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2376; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2377; NVPTX:       is_worker_check:
2378; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2379; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2380; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2381; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2382; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2383; NVPTX:       worker_state_machine.begin:
2384; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2385; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2386; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2387; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2388; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2389; NVPTX:       worker_state_machine.finished:
2390; NVPTX-NEXT:    ret void
2391; NVPTX:       worker_state_machine.is_active.check:
2392; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2393; NVPTX:       worker_state_machine.parallel_region.check:
2394; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__13_wrapper.ID
2395; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2396; NVPTX:       worker_state_machine.parallel_region.execute:
2397; NVPTX-NEXT:    call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]])
2398; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2399; NVPTX:       worker_state_machine.parallel_region.check1:
2400; NVPTX-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
2401; NVPTX:       worker_state_machine.parallel_region.execute2:
2402; NVPTX-NEXT:    call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]])
2403; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2404; NVPTX:       worker_state_machine.parallel_region.check3:
2405; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2406; NVPTX:       worker_state_machine.parallel_region.end:
2407; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
2408; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2409; NVPTX:       worker_state_machine.done.barrier:
2410; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2411; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2412; NVPTX:       thread.user_code.check:
2413; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2414; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2415; NVPTX:       user_code.entry:
2416; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2417; NVPTX-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2418; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2419; NVPTX-NEXT:    ret void
2420; NVPTX:       worker.exit:
2421; NVPTX-NEXT:    ret void
2422;
2423;
2424; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2425; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12
2426; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2427; NVPTX-NEXT:  entry:
2428; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2429; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2430; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2431; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
2432; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2433; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2434; NVPTX-NEXT:    ret void
2435;
2436;
2437; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2438; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13
2439; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2440; NVPTX-NEXT:  entry:
2441; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2442; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2443; NVPTX-NEXT:    call void @p0() #[[ATTR11]]
2444; NVPTX-NEXT:    ret void
2445;
2446;
2447; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2448; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
2449; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2450; NVPTX-NEXT:  entry:
2451; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2452; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2453; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2454; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2455; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2456; NVPTX-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2457; NVPTX-NEXT:    ret void
2458;
2459;
2460; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2461; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14
2462; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2463; NVPTX-NEXT:  entry:
2464; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2465; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2466; NVPTX-NEXT:    call void @p1() #[[ATTR11]]
2467; NVPTX-NEXT:    ret void
2468;
2469;
2470; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2471; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
2472; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2473; NVPTX-NEXT:  entry:
2474; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2475; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2476; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2477; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2478; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2479; NVPTX-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2480; NVPTX-NEXT:    ret void
2481;
2482;
2483; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2484; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
2485; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2486; NVPTX-NEXT:  entry:
2487; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2488; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2489; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
2490; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2491; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2492; NVPTX:       user_code.entry:
2493; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2494; NVPTX-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2495; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2496; NVPTX-NEXT:    ret void
2497; NVPTX:       worker.exit:
2498; NVPTX-NEXT:    ret void
2499;
2500;
2501; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2502; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15
2503; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2504; NVPTX-NEXT:  entry:
2505; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2506; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2507; NVPTX-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
2508; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
2509; NVPTX-NEXT:    ret void
2510;
2511;
2512; NVPTX: Function Attrs: noinline nounwind
2513; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
2514; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
2515; NVPTX-NEXT:  entry:
2516; NVPTX-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
2517; NVPTX-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
2518; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
2519; NVPTX-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
2520; NVPTX-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
2521; NVPTX:       if.then:
2522; NVPTX-NEXT:    br label [[RETURN:%.*]]
2523; NVPTX:       if.end:
2524; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
2525; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
2526; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
2527; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
2528; NVPTX-NEXT:    br label [[RETURN]]
2529; NVPTX:       return:
2530; NVPTX-NEXT:    ret void
2531;
2532;
2533; NVPTX: Function Attrs: convergent noinline nounwind
2534; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
2535; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
2536; NVPTX-NEXT:  entry:
2537; NVPTX-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
2538; NVPTX-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
2539; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
2540; NVPTX-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
2541; NVPTX-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
2542; NVPTX:       if.then:
2543; NVPTX-NEXT:    br label [[RETURN:%.*]]
2544; NVPTX:       if.end:
2545; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
2546; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
2547; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
2548; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
2549; NVPTX-NEXT:    br label [[RETURN]]
2550; NVPTX:       return:
2551; NVPTX-NEXT:    ret void
2552;
2553;
2554; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2555; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
2556; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2557; NVPTX-NEXT:  entry:
2558; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2559; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2560; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2561; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
2562; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2563; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2564; NVPTX:       is_worker_check:
2565; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2566; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2567; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2568; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2569; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2570; NVPTX:       worker_state_machine.begin:
2571; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2572; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2573; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2574; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2575; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2576; NVPTX:       worker_state_machine.finished:
2577; NVPTX-NEXT:    ret void
2578; NVPTX:       worker_state_machine.is_active.check:
2579; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2580; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
2581; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2582; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2583; NVPTX:       worker_state_machine.parallel_region.end:
2584; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
2585; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2586; NVPTX:       worker_state_machine.done.barrier:
2587; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2588; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2589; NVPTX:       thread.user_code.check:
2590; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2591; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2592; NVPTX:       user_code.entry:
2593; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2594; NVPTX-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2595; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2596; NVPTX-NEXT:    ret void
2597; NVPTX:       worker.exit:
2598; NVPTX-NEXT:    ret void
2599;
2600;
2601; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2602; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16
2603; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2604; NVPTX-NEXT:  entry:
2605; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2606; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2607; NVPTX-NEXT:    call void @weak_callee_empty() #[[ATTR9]]
2608; NVPTX-NEXT:    ret void
2609;
2610;
2611; NVPTX: Function Attrs: convergent noinline nounwind
2612; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty
2613; NVPTX-SAME: () #[[ATTR1]] {
2614; NVPTX-NEXT:  entry:
2615; NVPTX-NEXT:    ret void
2616;
2617;
2618; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2619; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17
2620; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2621; NVPTX-NEXT:  entry:
2622; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2623; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2624; NVPTX-NEXT:    call void @p0() #[[ATTR11]]
2625; NVPTX-NEXT:    ret void
2626;
2627;
2628; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2629; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
2630; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2631; NVPTX-NEXT:  entry:
2632; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2633; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2634; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2635; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2636; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2637; NVPTX-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2638; NVPTX-NEXT:    ret void
2639;
2640;
2641; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2642; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18
2643; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2644; NVPTX-NEXT:  entry:
2645; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2646; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2647; NVPTX-NEXT:    call void @p0() #[[ATTR11]]
2648; NVPTX-NEXT:    ret void
2649;
2650;
2651; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2652; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
2653; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2654; NVPTX-NEXT:  entry:
2655; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2656; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2657; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2658; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2659; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2660; NVPTX-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2661; NVPTX-NEXT:    ret void
2662;
2663;
2664; NVPTX: Function Attrs: noinline nounwind
2665; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
2666; NVPTX-SAME: () #[[ATTR6]] {
2667; NVPTX-NEXT:  entry:
2668; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2669; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2670; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2671; NVPTX-NEXT:    ret void
2672;
2673;
2674; NVPTX: Function Attrs: convergent noinline nounwind
2675; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
2676; NVPTX-SAME: () #[[ATTR1]] {
2677; NVPTX-NEXT:  entry:
2678; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2679; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2680; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2681; NVPTX-NEXT:    ret void
2682;
2683;
2684; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2685; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19
2686; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2687; NVPTX-NEXT:  entry:
2688; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2689; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2690; NVPTX-NEXT:    call void @p0() #[[ATTR11]]
2691; NVPTX-NEXT:    ret void
2692;
2693;
2694; NVPTX: Function Attrs: convergent noinline norecurse nounwind
2695; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
2696; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2697; NVPTX-NEXT:  entry:
2698; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2699; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2700; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2701; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2702; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2703; NVPTX-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2704; NVPTX-NEXT:    ret void
2705;
2706;
2707; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2708; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
2709; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
2710; AMDGPU-DISABLED-NEXT:  entry:
2711; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2712; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2713; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
2714; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2715; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2716; AMDGPU-DISABLED:       user_code.entry:
2717; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
2718; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2719; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
2720; AMDGPU-DISABLED-NEXT:    ret void
2721; AMDGPU-DISABLED:       worker.exit:
2722; AMDGPU-DISABLED-NEXT:    ret void
2723;
2724;
2725; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
2726; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
2727; AMDGPU-DISABLED-NEXT:    ret i32 0
2728;
2729;
2730; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2731; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
2732; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2733; AMDGPU-DISABLED-NEXT:  entry:
2734; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2735; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2736; AMDGPU-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
2737; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
2738; AMDGPU-DISABLED-NEXT:    ret void
2739;
2740;
2741; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
2742; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
2743; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
2744; AMDGPU-DISABLED-NEXT:  entry:
2745; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2746; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
2747; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
2748; AMDGPU-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
2749; AMDGPU-DISABLED:       omp_if.then:
2750; AMDGPU-DISABLED-NEXT:    store i32 0, ptr @G, align 4
2751; AMDGPU-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
2752; AMDGPU-DISABLED-NEXT:    br label [[OMP_IF_END]]
2753; AMDGPU-DISABLED:       omp_if.end:
2754; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
2755; AMDGPU-DISABLED-NEXT:    ret void
2756;
2757;
2758; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
2759; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here
2760; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
2761; AMDGPU-DISABLED-NEXT:  entry:
2762; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2763; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
2764; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
2765; AMDGPU-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
2766; AMDGPU-DISABLED:       omp_if.then:
2767; AMDGPU-DISABLED-NEXT:    store i32 0, ptr @G, align 4
2768; AMDGPU-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
2769; AMDGPU-DISABLED-NEXT:    br label [[OMP_IF_END]]
2770; AMDGPU-DISABLED:       omp_if.end:
2771; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
2772; AMDGPU-DISABLED-NEXT:    ret void
2773;
2774;
2775; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2776; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
2777; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2778; AMDGPU-DISABLED-NEXT:  entry:
2779; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2780; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2781; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
2782; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2783; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2784; AMDGPU-DISABLED:       user_code.entry:
2785; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2786; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2787; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
2788; AMDGPU-DISABLED-NEXT:    ret void
2789; AMDGPU-DISABLED:       worker.exit:
2790; AMDGPU-DISABLED-NEXT:    ret void
2791;
2792;
2793; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2794; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
2795; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2796; AMDGPU-DISABLED-NEXT:  entry:
2797; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2798; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2799; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2800; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
2801; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2802; AMDGPU-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
2803; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2804; AMDGPU-DISABLED-NEXT:    ret void
2805;
2806;
2807; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2808; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
2809; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2810; AMDGPU-DISABLED-NEXT:  entry:
2811; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2812; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2813; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR11:[0-9]+]]
2814; AMDGPU-DISABLED-NEXT:    ret void
2815;
2816;
2817; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2818; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
2819; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2820; AMDGPU-DISABLED-NEXT:  entry:
2821; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2822; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2823; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2824; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2825; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2826; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2827; AMDGPU-DISABLED-NEXT:    ret void
2828;
2829;
2830; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2831; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
2832; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2833; AMDGPU-DISABLED-NEXT:  entry:
2834; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2835; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2836; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
2837; AMDGPU-DISABLED-NEXT:    ret void
2838;
2839;
2840; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2841; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
2842; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2843; AMDGPU-DISABLED-NEXT:  entry:
2844; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2845; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2846; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2847; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2848; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2849; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2850; AMDGPU-DISABLED-NEXT:    ret void
2851;
2852;
2853; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2854; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
2855; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2856; AMDGPU-DISABLED-NEXT:  entry:
2857; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2858; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2859; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
2860; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2861; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2862; AMDGPU-DISABLED:       user_code.entry:
2863; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2864; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2865; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
2866; AMDGPU-DISABLED-NEXT:    ret void
2867; AMDGPU-DISABLED:       worker.exit:
2868; AMDGPU-DISABLED-NEXT:    ret void
2869;
2870;
2871; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2872; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
2873; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2874; AMDGPU-DISABLED-NEXT:  entry:
2875; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2876; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2877; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
2878; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
2879; AMDGPU-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
2880; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2881; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
2882; AMDGPU-DISABLED-NEXT:    ret void
2883;
2884;
2885; AMDGPU-DISABLED: Function Attrs: noinline nounwind
2886; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
2887; AMDGPU-DISABLED-SAME: () #[[ATTR6:[0-9]+]] {
2888; AMDGPU-DISABLED-NEXT:  entry:
2889; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2890; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2891; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2892; AMDGPU-DISABLED-NEXT:    ret void
2893;
2894;
2895; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
2896; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
2897; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
2898; AMDGPU-DISABLED-NEXT:  entry:
2899; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2900; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2901; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2902; AMDGPU-DISABLED-NEXT:    ret void
2903;
2904;
2905; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2906; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
2907; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2908; AMDGPU-DISABLED-NEXT:  entry:
2909; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2910; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2911; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
2912; AMDGPU-DISABLED-NEXT:    ret void
2913;
2914;
2915; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2916; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
2917; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2918; AMDGPU-DISABLED-NEXT:  entry:
2919; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2920; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2921; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2922; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2923; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2924; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2925; AMDGPU-DISABLED-NEXT:    ret void
2926;
2927;
2928; AMDGPU-DISABLED: Function Attrs: noinline nounwind
2929; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
2930; AMDGPU-DISABLED-SAME: () #[[ATTR6]] {
2931; AMDGPU-DISABLED-NEXT:  entry:
2932; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2933; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
2934; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2935; AMDGPU-DISABLED-NEXT:    ret void
2936;
2937;
2938; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
2939; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
2940; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
2941; AMDGPU-DISABLED-NEXT:  entry:
2942; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2943; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
2944; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2945; AMDGPU-DISABLED-NEXT:    ret void
2946;
2947;
2948; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2949; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
2950; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
2951; AMDGPU-DISABLED-NEXT:  entry:
2952; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2953; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2954; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
2955; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2956; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2957; AMDGPU-DISABLED:       user_code.entry:
2958; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
2959; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2960; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
2961; AMDGPU-DISABLED-NEXT:    ret void
2962; AMDGPU-DISABLED:       worker.exit:
2963; AMDGPU-DISABLED-NEXT:    ret void
2964;
2965;
2966; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2967; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
2968; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2969; AMDGPU-DISABLED-NEXT:  entry:
2970; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2971; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2972; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
2973; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2974; AMDGPU-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
2975; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
2976; AMDGPU-DISABLED-NEXT:    ret void
2977;
2978;
2979; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2980; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
2981; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
2982; AMDGPU-DISABLED-NEXT:  entry:
2983; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
2984; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
2985; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
2986; AMDGPU-DISABLED-NEXT:    ret void
2987;
2988;
2989; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
2990; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2991; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
2992; AMDGPU-DISABLED-NEXT:  entry:
2993; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
2994; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2995; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2996; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2997; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2998; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
2999; AMDGPU-DISABLED-NEXT:    ret void
3000;
3001;
3002; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3003; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
3004; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3005; AMDGPU-DISABLED-NEXT:  entry:
3006; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3007; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3008; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3009; AMDGPU-DISABLED-NEXT:    ret void
3010;
3011;
3012; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3013; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
3014; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3015; AMDGPU-DISABLED-NEXT:  entry:
3016; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3017; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3018; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3019; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3020; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3021; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3022; AMDGPU-DISABLED-NEXT:    ret void
3023;
3024;
3025; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3026; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
3027; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3028; AMDGPU-DISABLED-NEXT:  entry:
3029; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3030; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3031; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
3032; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3033; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3034; AMDGPU-DISABLED:       user_code.entry:
3035; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3036; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3037; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3038; AMDGPU-DISABLED-NEXT:    ret void
3039; AMDGPU-DISABLED:       worker.exit:
3040; AMDGPU-DISABLED-NEXT:    ret void
3041;
3042;
3043; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3044; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
3045; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3046; AMDGPU-DISABLED-NEXT:  entry:
3047; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3048; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3049; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3050; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3051; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
3052; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3053; AMDGPU-DISABLED-NEXT:    ret void
3054;
3055;
3056; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3057; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
3058; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3059; AMDGPU-DISABLED-NEXT:  entry:
3060; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3061; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3062; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3063; AMDGPU-DISABLED-NEXT:    ret void
3064;
3065;
3066; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3067; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
3068; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3069; AMDGPU-DISABLED-NEXT:  entry:
3070; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3071; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3072; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3073; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3074; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3075; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3076; AMDGPU-DISABLED-NEXT:    ret void
3077;
3078;
3079; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3080; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
3081; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3082; AMDGPU-DISABLED-NEXT:  entry:
3083; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3084; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3085; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3086; AMDGPU-DISABLED-NEXT:    ret void
3087;
3088;
3089; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3090; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
3091; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3092; AMDGPU-DISABLED-NEXT:  entry:
3093; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3094; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3095; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3096; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3097; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3098; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3099; AMDGPU-DISABLED-NEXT:    ret void
3100;
3101;
3102; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3103; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
3104; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3105; AMDGPU-DISABLED-NEXT:  entry:
3106; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3107; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3108; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
3109; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3110; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3111; AMDGPU-DISABLED:       user_code.entry:
3112; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3113; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3114; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3115; AMDGPU-DISABLED-NEXT:    ret void
3116; AMDGPU-DISABLED:       worker.exit:
3117; AMDGPU-DISABLED-NEXT:    ret void
3118;
3119;
3120; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3121; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
3122; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3123; AMDGPU-DISABLED-NEXT:  entry:
3124; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3125; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3126; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3127; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
3128; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3129; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3130; AMDGPU-DISABLED-NEXT:    ret void
3131;
3132;
3133; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3134; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
3135; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3136; AMDGPU-DISABLED-NEXT:  entry:
3137; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3138; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3139; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3140; AMDGPU-DISABLED-NEXT:    ret void
3141;
3142;
3143; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3144; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
3145; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3146; AMDGPU-DISABLED-NEXT:  entry:
3147; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3148; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3149; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3150; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3151; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3152; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3153; AMDGPU-DISABLED-NEXT:    ret void
3154;
3155;
3156; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3157; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
3158; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3159; AMDGPU-DISABLED-NEXT:  entry:
3160; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3161; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3162; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3163; AMDGPU-DISABLED-NEXT:    ret void
3164;
3165;
3166; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3167; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
3168; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3169; AMDGPU-DISABLED-NEXT:  entry:
3170; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3171; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3172; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3173; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3174; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3175; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3176; AMDGPU-DISABLED-NEXT:    ret void
3177;
3178;
3179; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3180; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
3181; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3182; AMDGPU-DISABLED-NEXT:  entry:
3183; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3184; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3185; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
3186; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3187; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3188; AMDGPU-DISABLED:       user_code.entry:
3189; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3190; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3191; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3192; AMDGPU-DISABLED-NEXT:    ret void
3193; AMDGPU-DISABLED:       worker.exit:
3194; AMDGPU-DISABLED-NEXT:    ret void
3195;
3196;
3197; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3198; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
3199; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3200; AMDGPU-DISABLED-NEXT:  entry:
3201; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3202; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3203; AMDGPU-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
3204; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
3205; AMDGPU-DISABLED-NEXT:    ret void
3206;
3207;
3208; AMDGPU-DISABLED: Function Attrs: noinline nounwind
3209; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
3210; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
3211; AMDGPU-DISABLED-NEXT:  entry:
3212; AMDGPU-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
3213; AMDGPU-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
3214; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
3215; AMDGPU-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
3216; AMDGPU-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
3217; AMDGPU-DISABLED:       if.then:
3218; AMDGPU-DISABLED-NEXT:    br label [[RETURN:%.*]]
3219; AMDGPU-DISABLED:       if.end:
3220; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
3221; AMDGPU-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
3222; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
3223; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
3224; AMDGPU-DISABLED-NEXT:    br label [[RETURN]]
3225; AMDGPU-DISABLED:       return:
3226; AMDGPU-DISABLED-NEXT:    ret void
3227;
3228;
3229; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
3230; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
3231; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
3232; AMDGPU-DISABLED-NEXT:  entry:
3233; AMDGPU-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
3234; AMDGPU-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
3235; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
3236; AMDGPU-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
3237; AMDGPU-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
3238; AMDGPU-DISABLED:       if.then:
3239; AMDGPU-DISABLED-NEXT:    br label [[RETURN:%.*]]
3240; AMDGPU-DISABLED:       if.end:
3241; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
3242; AMDGPU-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
3243; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
3244; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
3245; AMDGPU-DISABLED-NEXT:    br label [[RETURN]]
3246; AMDGPU-DISABLED:       return:
3247; AMDGPU-DISABLED-NEXT:    ret void
3248;
3249;
3250; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3251; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
3252; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3253; AMDGPU-DISABLED-NEXT:  entry:
3254; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3255; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3256; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
3257; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3258; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3259; AMDGPU-DISABLED:       user_code.entry:
3260; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3261; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3262; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3263; AMDGPU-DISABLED-NEXT:    ret void
3264; AMDGPU-DISABLED:       worker.exit:
3265; AMDGPU-DISABLED-NEXT:    ret void
3266;
3267;
3268; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3269; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
3270; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3271; AMDGPU-DISABLED-NEXT:  entry:
3272; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3273; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3274; AMDGPU-DISABLED-NEXT:    call void @weak_callee_empty() #[[ATTR9]]
3275; AMDGPU-DISABLED-NEXT:    ret void
3276;
3277;
3278; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
3279; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty
3280; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
3281; AMDGPU-DISABLED-NEXT:  entry:
3282; AMDGPU-DISABLED-NEXT:    ret void
3283;
3284;
3285; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3286; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
3287; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3288; AMDGPU-DISABLED-NEXT:  entry:
3289; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3290; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3291; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3292; AMDGPU-DISABLED-NEXT:    ret void
3293;
3294;
3295; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3296; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
3297; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3298; AMDGPU-DISABLED-NEXT:  entry:
3299; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3300; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3301; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3302; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3303; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3304; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3305; AMDGPU-DISABLED-NEXT:    ret void
3306;
3307;
3308; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3309; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
3310; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3311; AMDGPU-DISABLED-NEXT:  entry:
3312; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3313; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3314; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3315; AMDGPU-DISABLED-NEXT:    ret void
3316;
3317;
3318; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3319; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
3320; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3321; AMDGPU-DISABLED-NEXT:  entry:
3322; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3323; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3324; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3325; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3326; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3327; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3328; AMDGPU-DISABLED-NEXT:    ret void
3329;
3330;
3331; AMDGPU-DISABLED: Function Attrs: noinline nounwind
3332; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
3333; AMDGPU-DISABLED-SAME: () #[[ATTR6]] {
3334; AMDGPU-DISABLED-NEXT:  entry:
3335; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3336; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3337; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3338; AMDGPU-DISABLED-NEXT:    ret void
3339;
3340;
3341; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind
3342; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
3343; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
3344; AMDGPU-DISABLED-NEXT:  entry:
3345; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3346; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3347; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3348; AMDGPU-DISABLED-NEXT:    ret void
3349;
3350;
3351; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3352; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
3353; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3354; AMDGPU-DISABLED-NEXT:  entry:
3355; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3356; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3357; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3358; AMDGPU-DISABLED-NEXT:    ret void
3359;
3360;
3361; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3362; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
3363; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3364; AMDGPU-DISABLED-NEXT:  entry:
3365; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3366; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3367; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3368; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3369; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3370; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3371; AMDGPU-DISABLED-NEXT:    ret void
3372;
3373;
3374; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3375; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
3376; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
3377; NVPTX-DISABLED-NEXT:  entry:
3378; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3379; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3380; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
3381; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3382; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3383; NVPTX-DISABLED:       user_code.entry:
3384; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
3385; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3386; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3387; NVPTX-DISABLED-NEXT:    ret void
3388; NVPTX-DISABLED:       worker.exit:
3389; NVPTX-DISABLED-NEXT:    ret void
3390;
3391;
3392; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
3393; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3394; NVPTX-DISABLED-NEXT:    ret i32 0
3395;
3396;
3397; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3398; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
3399; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3400; NVPTX-DISABLED-NEXT:  entry:
3401; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3402; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3403; NVPTX-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
3404; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
3405; NVPTX-DISABLED-NEXT:    ret void
3406;
3407;
3408; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
3409; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
3410; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
3411; NVPTX-DISABLED-NEXT:  entry:
3412; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3413; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
3414; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
3415; NVPTX-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
3416; NVPTX-DISABLED:       omp_if.then:
3417; NVPTX-DISABLED-NEXT:    store i32 0, ptr @G, align 4
3418; NVPTX-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
3419; NVPTX-DISABLED-NEXT:    br label [[OMP_IF_END]]
3420; NVPTX-DISABLED:       omp_if.end:
3421; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
3422; NVPTX-DISABLED-NEXT:    ret void
3423;
3424;
3425; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
3426; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here
3427; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
3428; NVPTX-DISABLED-NEXT:  entry:
3429; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3430; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
3431; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
3432; NVPTX-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
3433; NVPTX-DISABLED:       omp_if.then:
3434; NVPTX-DISABLED-NEXT:    store i32 0, ptr @G, align 4
3435; NVPTX-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
3436; NVPTX-DISABLED-NEXT:    br label [[OMP_IF_END]]
3437; NVPTX-DISABLED:       omp_if.end:
3438; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
3439; NVPTX-DISABLED-NEXT:    ret void
3440;
3441;
3442; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3443; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
3444; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3445; NVPTX-DISABLED-NEXT:  entry:
3446; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3447; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3448; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
3449; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3450; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3451; NVPTX-DISABLED:       user_code.entry:
3452; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3453; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3454; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3455; NVPTX-DISABLED-NEXT:    ret void
3456; NVPTX-DISABLED:       worker.exit:
3457; NVPTX-DISABLED-NEXT:    ret void
3458;
3459;
3460; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3461; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
3462; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3463; NVPTX-DISABLED-NEXT:  entry:
3464; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3465; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3466; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3467; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
3468; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3469; NVPTX-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
3470; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3471; NVPTX-DISABLED-NEXT:    ret void
3472;
3473;
3474; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3475; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
3476; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3477; NVPTX-DISABLED-NEXT:  entry:
3478; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3479; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3480; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR11:[0-9]+]]
3481; NVPTX-DISABLED-NEXT:    ret void
3482;
3483;
3484; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3485; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
3486; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3487; NVPTX-DISABLED-NEXT:  entry:
3488; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3489; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3490; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3491; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3492; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3493; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3494; NVPTX-DISABLED-NEXT:    ret void
3495;
3496;
3497; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3498; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
3499; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3500; NVPTX-DISABLED-NEXT:  entry:
3501; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3502; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3503; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3504; NVPTX-DISABLED-NEXT:    ret void
3505;
3506;
3507; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3508; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
3509; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3510; NVPTX-DISABLED-NEXT:  entry:
3511; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3512; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3513; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3514; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3515; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3516; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3517; NVPTX-DISABLED-NEXT:    ret void
3518;
3519;
3520; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3521; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
3522; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3523; NVPTX-DISABLED-NEXT:  entry:
3524; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3525; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3526; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
3527; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3528; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3529; NVPTX-DISABLED:       user_code.entry:
3530; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3531; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3532; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3533; NVPTX-DISABLED-NEXT:    ret void
3534; NVPTX-DISABLED:       worker.exit:
3535; NVPTX-DISABLED-NEXT:    ret void
3536;
3537;
3538; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3539; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
3540; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3541; NVPTX-DISABLED-NEXT:  entry:
3542; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3543; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3544; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
3545; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
3546; NVPTX-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
3547; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3548; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
3549; NVPTX-DISABLED-NEXT:    ret void
3550;
3551;
3552; NVPTX-DISABLED: Function Attrs: noinline nounwind
3553; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
3554; NVPTX-DISABLED-SAME: () #[[ATTR6:[0-9]+]] {
3555; NVPTX-DISABLED-NEXT:  entry:
3556; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3557; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3558; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3559; NVPTX-DISABLED-NEXT:    ret void
3560;
3561;
3562; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
3563; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
3564; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
3565; NVPTX-DISABLED-NEXT:  entry:
3566; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3567; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3568; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3569; NVPTX-DISABLED-NEXT:    ret void
3570;
3571;
3572; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3573; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
3574; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3575; NVPTX-DISABLED-NEXT:  entry:
3576; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3577; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3578; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3579; NVPTX-DISABLED-NEXT:    ret void
3580;
3581;
3582; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3583; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
3584; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3585; NVPTX-DISABLED-NEXT:  entry:
3586; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3587; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3588; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3589; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3590; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3591; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3592; NVPTX-DISABLED-NEXT:    ret void
3593;
3594;
3595; NVPTX-DISABLED: Function Attrs: noinline nounwind
3596; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
3597; NVPTX-DISABLED-SAME: () #[[ATTR6]] {
3598; NVPTX-DISABLED-NEXT:  entry:
3599; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3600; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
3601; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3602; NVPTX-DISABLED-NEXT:    ret void
3603;
3604;
3605; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
3606; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
3607; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
3608; NVPTX-DISABLED-NEXT:  entry:
3609; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3610; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
3611; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3612; NVPTX-DISABLED-NEXT:    ret void
3613;
3614;
3615; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3616; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
3617; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3618; NVPTX-DISABLED-NEXT:  entry:
3619; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3620; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3621; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
3622; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3623; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3624; NVPTX-DISABLED:       user_code.entry:
3625; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3626; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3627; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3628; NVPTX-DISABLED-NEXT:    ret void
3629; NVPTX-DISABLED:       worker.exit:
3630; NVPTX-DISABLED-NEXT:    ret void
3631;
3632;
3633; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3634; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
3635; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3636; NVPTX-DISABLED-NEXT:  entry:
3637; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3638; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3639; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3640; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3641; NVPTX-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
3642; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3643; NVPTX-DISABLED-NEXT:    ret void
3644;
3645;
3646; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3647; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
3648; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3649; NVPTX-DISABLED-NEXT:  entry:
3650; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3651; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3652; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3653; NVPTX-DISABLED-NEXT:    ret void
3654;
3655;
3656; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3657; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
3658; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3659; NVPTX-DISABLED-NEXT:  entry:
3660; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3661; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3662; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3663; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3664; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3665; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3666; NVPTX-DISABLED-NEXT:    ret void
3667;
3668;
3669; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3670; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
3671; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3672; NVPTX-DISABLED-NEXT:  entry:
3673; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3674; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3675; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3676; NVPTX-DISABLED-NEXT:    ret void
3677;
3678;
3679; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3680; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
3681; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3682; NVPTX-DISABLED-NEXT:  entry:
3683; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3684; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3685; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3686; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3687; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3688; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3689; NVPTX-DISABLED-NEXT:    ret void
3690;
3691;
3692; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3693; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
3694; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3695; NVPTX-DISABLED-NEXT:  entry:
3696; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3697; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3698; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
3699; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3700; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3701; NVPTX-DISABLED:       user_code.entry:
3702; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3703; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3704; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3705; NVPTX-DISABLED-NEXT:    ret void
3706; NVPTX-DISABLED:       worker.exit:
3707; NVPTX-DISABLED-NEXT:    ret void
3708;
3709;
3710; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3711; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
3712; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3713; NVPTX-DISABLED-NEXT:  entry:
3714; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3715; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3716; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3717; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3718; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
3719; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3720; NVPTX-DISABLED-NEXT:    ret void
3721;
3722;
3723; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3724; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
3725; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3726; NVPTX-DISABLED-NEXT:  entry:
3727; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3728; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3729; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3730; NVPTX-DISABLED-NEXT:    ret void
3731;
3732;
3733; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3734; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
3735; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3736; NVPTX-DISABLED-NEXT:  entry:
3737; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3738; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3739; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3740; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3741; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3742; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3743; NVPTX-DISABLED-NEXT:    ret void
3744;
3745;
3746; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3747; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
3748; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3749; NVPTX-DISABLED-NEXT:  entry:
3750; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3751; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3752; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3753; NVPTX-DISABLED-NEXT:    ret void
3754;
3755;
3756; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3757; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
3758; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3759; NVPTX-DISABLED-NEXT:  entry:
3760; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3761; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3762; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3763; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3764; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3765; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3766; NVPTX-DISABLED-NEXT:    ret void
3767;
3768;
3769; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3770; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
3771; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3772; NVPTX-DISABLED-NEXT:  entry:
3773; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3774; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3775; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
3776; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3777; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3778; NVPTX-DISABLED:       user_code.entry:
3779; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3780; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3781; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3782; NVPTX-DISABLED-NEXT:    ret void
3783; NVPTX-DISABLED:       worker.exit:
3784; NVPTX-DISABLED-NEXT:    ret void
3785;
3786;
3787; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3788; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
3789; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3790; NVPTX-DISABLED-NEXT:  entry:
3791; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3792; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
3793; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
3794; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR10]]
3795; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3796; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
3797; NVPTX-DISABLED-NEXT:    ret void
3798;
3799;
3800; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3801; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
3802; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3803; NVPTX-DISABLED-NEXT:  entry:
3804; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3805; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3806; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3807; NVPTX-DISABLED-NEXT:    ret void
3808;
3809;
3810; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3811; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
3812; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3813; NVPTX-DISABLED-NEXT:  entry:
3814; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3815; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3816; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3817; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3818; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3819; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3820; NVPTX-DISABLED-NEXT:    ret void
3821;
3822;
3823; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3824; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
3825; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3826; NVPTX-DISABLED-NEXT:  entry:
3827; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3828; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3829; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR11]]
3830; NVPTX-DISABLED-NEXT:    ret void
3831;
3832;
3833; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3834; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
3835; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3836; NVPTX-DISABLED-NEXT:  entry:
3837; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3838; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3839; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3840; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3841; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3842; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3843; NVPTX-DISABLED-NEXT:    ret void
3844;
3845;
3846; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3847; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
3848; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3849; NVPTX-DISABLED-NEXT:  entry:
3850; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3851; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3852; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
3853; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3854; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3855; NVPTX-DISABLED:       user_code.entry:
3856; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3857; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3858; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3859; NVPTX-DISABLED-NEXT:    ret void
3860; NVPTX-DISABLED:       worker.exit:
3861; NVPTX-DISABLED-NEXT:    ret void
3862;
3863;
3864; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3865; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
3866; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3867; NVPTX-DISABLED-NEXT:  entry:
3868; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3869; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3870; NVPTX-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
3871; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
3872; NVPTX-DISABLED-NEXT:    ret void
3873;
3874;
3875; NVPTX-DISABLED: Function Attrs: noinline nounwind
3876; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
3877; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
3878; NVPTX-DISABLED-NEXT:  entry:
3879; NVPTX-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
3880; NVPTX-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
3881; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
3882; NVPTX-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
3883; NVPTX-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
3884; NVPTX-DISABLED:       if.then:
3885; NVPTX-DISABLED-NEXT:    br label [[RETURN:%.*]]
3886; NVPTX-DISABLED:       if.end:
3887; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
3888; NVPTX-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
3889; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
3890; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
3891; NVPTX-DISABLED-NEXT:    br label [[RETURN]]
3892; NVPTX-DISABLED:       return:
3893; NVPTX-DISABLED-NEXT:    ret void
3894;
3895;
3896; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
3897; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
3898; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
3899; NVPTX-DISABLED-NEXT:  entry:
3900; NVPTX-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
3901; NVPTX-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
3902; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
3903; NVPTX-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
3904; NVPTX-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
3905; NVPTX-DISABLED:       if.then:
3906; NVPTX-DISABLED-NEXT:    br label [[RETURN:%.*]]
3907; NVPTX-DISABLED:       if.end:
3908; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
3909; NVPTX-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
3910; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
3911; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
3912; NVPTX-DISABLED-NEXT:    br label [[RETURN]]
3913; NVPTX-DISABLED:       return:
3914; NVPTX-DISABLED-NEXT:    ret void
3915;
3916;
3917; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3918; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
3919; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
3920; NVPTX-DISABLED-NEXT:  entry:
3921; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3922; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
3923; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
3924; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3925; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
3926; NVPTX-DISABLED:       user_code.entry:
3927; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
3928; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3929; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit()
3930; NVPTX-DISABLED-NEXT:    ret void
3931; NVPTX-DISABLED:       worker.exit:
3932; NVPTX-DISABLED-NEXT:    ret void
3933;
3934;
3935; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3936; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
3937; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3938; NVPTX-DISABLED-NEXT:  entry:
3939; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3940; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3941; NVPTX-DISABLED-NEXT:    call void @weak_callee_empty() #[[ATTR9]]
3942; NVPTX-DISABLED-NEXT:    ret void
3943;
3944;
3945; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
3946; NVPTX-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty
3947; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
3948; NVPTX-DISABLED-NEXT:  entry:
3949; NVPTX-DISABLED-NEXT:    ret void
3950;
3951;
3952; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3953; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
3954; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3955; NVPTX-DISABLED-NEXT:  entry:
3956; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3957; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3958; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3959; NVPTX-DISABLED-NEXT:    ret void
3960;
3961;
3962; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3963; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
3964; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3965; NVPTX-DISABLED-NEXT:  entry:
3966; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3967; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3968; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3969; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3970; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3971; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3972; NVPTX-DISABLED-NEXT:    ret void
3973;
3974;
3975; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3976; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
3977; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
3978; NVPTX-DISABLED-NEXT:  entry:
3979; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
3980; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
3981; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
3982; NVPTX-DISABLED-NEXT:    ret void
3983;
3984;
3985; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
3986; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
3987; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
3988; NVPTX-DISABLED-NEXT:  entry:
3989; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
3990; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3991; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3992; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3993; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3994; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
3995; NVPTX-DISABLED-NEXT:    ret void
3996;
3997;
3998; NVPTX-DISABLED: Function Attrs: noinline nounwind
3999; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
4000; NVPTX-DISABLED-SAME: () #[[ATTR6]] {
4001; NVPTX-DISABLED-NEXT:  entry:
4002; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4003; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
4004; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4005; NVPTX-DISABLED-NEXT:    ret void
4006;
4007;
4008; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind
4009; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
4010; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
4011; NVPTX-DISABLED-NEXT:  entry:
4012; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
4013; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
4014; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
4015; NVPTX-DISABLED-NEXT:    ret void
4016;
4017;
4018; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
4019; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
4020; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
4021; NVPTX-DISABLED-NEXT:  entry:
4022; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
4023; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
4024; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR11]]
4025; NVPTX-DISABLED-NEXT:    ret void
4026;
4027;
4028; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
4029; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
4030; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
4031; NVPTX-DISABLED-NEXT:  entry:
4032; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
4033; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
4034; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
4035; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
4036; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
4037; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
4038; NVPTX-DISABLED-NEXT:    ret void
4039;
4040;.
4041; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4042; AMDGPU: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4043; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4044; AMDGPU: attributes #[[ATTR3]] = { nounwind }
4045; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4046; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4047; AMDGPU: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4048; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4049; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4050; AMDGPU: attributes #[[ATTR9]] = { convergent nounwind }
4051; AMDGPU: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4052; AMDGPU: attributes #[[ATTR11]] = { convergent }
4053;.
4054; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4055; NVPTX: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4056; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4057; NVPTX: attributes #[[ATTR3]] = { nounwind }
4058; NVPTX: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4059; NVPTX: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4060; NVPTX: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4061; NVPTX: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4062; NVPTX: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4063; NVPTX: attributes #[[ATTR9]] = { convergent nounwind }
4064; NVPTX: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4065; NVPTX: attributes #[[ATTR11]] = { convergent }
4066;.
4067; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4068; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4069; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4070; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind }
4071; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4072; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4073; AMDGPU-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4074; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4075; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4076; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent nounwind }
4077; AMDGPU-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4078; AMDGPU-DISABLED: attributes #[[ATTR11]] = { convergent }
4079;.
4080; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4081; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4082; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4083; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind }
4084; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4085; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
4086; NVPTX-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4087; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4088; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
4089; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent nounwind }
4090; NVPTX-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
4091; NVPTX-DISABLED: attributes #[[ATTR11]] = { convergent }
4092;.
4093; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4094; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
4095; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
4096; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
4097; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
4098; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
4099; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
4100; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
4101; AMDGPU: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4102; AMDGPU: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4103; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4104;.
4105; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4106; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
4107; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
4108; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
4109; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
4110; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
4111; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
4112; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
4113; NVPTX: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4114; NVPTX: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4115; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4116;.
4117; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4118; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
4119; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
4120; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
4121; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
4122; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
4123; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
4124; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
4125; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4126; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4127; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4128;.
4129; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
4130; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
4131; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
4132; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
4133; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
4134; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
4135; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
4136; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
4137; NVPTX-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4138; NVPTX-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50}
4139; NVPTX-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
4140;.
4141