xref: /llvm-project/llvm/test/Transforms/OpenMP/spmdization.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
3; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
4; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1
5; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED2
6; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED1
7; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED2
8
9;; void unknown(void);
10;; [[omp::assume("ompx_spmd_amenable")]] void spmd_amenable(void);
11;;
12;; void sequential_loop() {
13;;   #pragma omp target teams
14;;   {
15;;     for (int i = 0; i < 100; ++i) {
16;;       #pragma omp parallel
17;;       {
18;;         unknown();
19;;       }
20;;     }
21;;     spmd_amenable();
22;;   }
23;; }
24;;
25;; [[omp::assume("ompx_spmd_amenable")]] void use(__attribute__((noescape)) int *);
26;;
27;; void sequential_loop_to_stack_var() {
28;;   #pragma omp target teams
29;;   {
30;;     int x;
31;;     use(&x);
32;;     for (int i = 0; i < 100; ++i) {
33;;       #pragma omp parallel
34;;       {
35;;         unknown();
36;;       }
37;;     }
38;;     spmd_amenable();
39;;   }
40;; }
41;;
42;; void sequential_loop_to_shared_var() {
43;;   #pragma omp target teams
44;;   {
45;;     int x;
46;;     for (int i = 0; i < 100; ++i) {
47;;       #pragma omp parallel
48;;       {
49;;         x++;
50;;         unknown();
51;;       }
52;;     }
53;;     spmd_amenable();
54;;   }
55;; }
56;;
57;; void sequential_loop_to_shared_var_guarded() {
58;;   #pragma omp target teams
59;;   {
60;;     int x = 42;
61;;     for (int i = 0; i < 100; ++i) {
62;;       #pragma omp parallel
63;;       {
64;;         x++;
65;;         unknown();
66;;       }
67;;     }
68;;     spmd_amenable();
69;;   }
70;; }
71;;
72;; void do_not_spmdize_target() {
73;; #pragma omp target teams
74;;     {
75;;         // Incompatible parallel level, called both
76;;         // from parallel and target regions
77;;         unknown();
78;;     }
79;; }
80;;
81;; void do_not_spmdize_task() {
82;; #pragma omp target
83;;     {
84;;         #pragma omp task
85;;             spmd_amenable();
86;;         #pragma omp parallel
87;;             unknown();
88;;     }
89;; }
90
91%struct.ident_t = type { i32, i32, i32, i32, ptr }
92%struct.kmp_task_t_with_privates = type { %struct.kmp_task_t }
93%struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t }
94%union.kmp_cmplrdata_t = type { ptr }
95%struct.anon = type {}
96%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
97%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
98
99@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
100@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
101@__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
102@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
103@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
104@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
105@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
106@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
107
108;.
109; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
110; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
111; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
112; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
113; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
114; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
115; AMDGPU: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
116; AMDGPU: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
117; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
118; AMDGPU: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
119; AMDGPU: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
120; AMDGPU: @__omp_outlined__9_wrapper.ID = private constant i8 undef
121;.
122; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
123; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
124; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
125; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
126; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
127; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
128; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
129; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
130; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
131; NVPTX: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
132; NVPTX: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
133; NVPTX: @__omp_outlined__9_wrapper.ID = private constant i8 undef
134;.
135; AMDGPU-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
136; AMDGPU-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
137; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
138; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
139; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
140; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
141; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
142; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
143; AMDGPU-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
144; AMDGPU-DISABLED1: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
145; AMDGPU-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef
146; AMDGPU-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef
147; AMDGPU-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef
148; AMDGPU-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef
149; AMDGPU-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef
150;.
151; AMDGPU-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
152; AMDGPU-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
153; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
154; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
155; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
156; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
157; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
158; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
159; AMDGPU-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
160; AMDGPU-DISABLED2: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
161; AMDGPU-DISABLED2: @__omp_outlined__1_wrapper.ID = private constant i8 undef
162; AMDGPU-DISABLED2: @__omp_outlined__3_wrapper.ID = private constant i8 undef
163; AMDGPU-DISABLED2: @__omp_outlined__5_wrapper.ID = private constant i8 undef
164; AMDGPU-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef
165; AMDGPU-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef
166;.
167; NVPTX-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
168; NVPTX-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
169; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
170; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
171; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
172; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
173; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
174; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
175; NVPTX-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
176; NVPTX-DISABLED1: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
177; NVPTX-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef
178; NVPTX-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef
179; NVPTX-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef
180; NVPTX-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef
181; NVPTX-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef
182;.
183; NVPTX-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
184; NVPTX-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
185; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
186; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
187; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
188; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
189; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
190; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
191; NVPTX-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
192; NVPTX-DISABLED2: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
193; NVPTX-DISABLED2: @__omp_outlined__1_wrapper.ID = private constant i8 undef
194; NVPTX-DISABLED2: @__omp_outlined__3_wrapper.ID = private constant i8 undef
195; NVPTX-DISABLED2: @__omp_outlined__5_wrapper.ID = private constant i8 undef
196; NVPTX-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef
197; NVPTX-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef
198;.
199define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
200; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
201; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
202; AMDGPU-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
203; AMDGPU-NEXT:    ret void
204;
205; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
206; NVPTX-SAME: () #[[ATTR0:[0-9]+]] {
207; NVPTX-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
208; NVPTX-NEXT:    ret void
209;
210; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
211; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
212; AMDGPU-DISABLED1-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
213; AMDGPU-DISABLED1-NEXT:    ret void
214;
215; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
216; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
217; AMDGPU-DISABLED2-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
218; AMDGPU-DISABLED2-NEXT:    ret void
219;
220; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
221; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
222; NVPTX-DISABLED1-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
223; NVPTX-DISABLED1-NEXT:    ret void
224;
225; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
226; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
227; NVPTX-DISABLED2-NEXT:    call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
228; NVPTX-DISABLED2-NEXT:    ret void
229  call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
230  ret void
231}
232
233define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() {
234; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
235; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
236; AMDGPU-NEXT:  entry:
237; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
238; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
239; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
240; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
241; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
242; AMDGPU:       common.ret:
243; AMDGPU-NEXT:    ret void
244; AMDGPU:       user_code.entry:
245; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
246; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
247; AMDGPU-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
248; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
249; AMDGPU-NEXT:    br label [[COMMON_RET]]
250;
251; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
252; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
253; NVPTX-NEXT:  entry:
254; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
255; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
256; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
257; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
258; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
259; NVPTX:       common.ret:
260; NVPTX-NEXT:    ret void
261; NVPTX:       user_code.entry:
262; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
263; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
264; NVPTX-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
265; NVPTX-NEXT:    call void @__kmpc_target_deinit()
266; NVPTX-NEXT:    br label [[COMMON_RET]]
267;
268; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
269; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
270; AMDGPU-DISABLED1-NEXT:  entry:
271; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
272; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
273; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
274; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
275; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
276; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
277; AMDGPU-DISABLED1:       is_worker_check:
278; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
279; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
280; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
281; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
282; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
283; AMDGPU-DISABLED1:       worker_state_machine.begin:
284; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
285; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
286; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
287; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
288; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
289; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
290; AMDGPU-DISABLED1:       worker_state_machine.finished:
291; AMDGPU-DISABLED1-NEXT:    ret void
292; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
293; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
294; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
295; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
296; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
297; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
298; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
299; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
300; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
301; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
302; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
303; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
304; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
305; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
306; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
307; AMDGPU-DISABLED1:       thread.user_code.check:
308; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
309; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
310; AMDGPU-DISABLED1:       common.ret:
311; AMDGPU-DISABLED1-NEXT:    ret void
312; AMDGPU-DISABLED1:       user_code.entry:
313; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
314; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
315; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
316; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
317; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
318;
319; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
320; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
321; AMDGPU-DISABLED2-NEXT:  entry:
322; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
323; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
324; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
325; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
326; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
327; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
328; AMDGPU-DISABLED2:       is_worker_check:
329; AMDGPU-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
330; AMDGPU-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
331; AMDGPU-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
332; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
333; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
334; AMDGPU-DISABLED2:       worker_state_machine.begin:
335; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
336; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
337; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
338; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
339; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
340; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
341; AMDGPU-DISABLED2:       worker_state_machine.finished:
342; AMDGPU-DISABLED2-NEXT:    ret void
343; AMDGPU-DISABLED2:       worker_state_machine.is_active.check:
344; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
345; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check:
346; AMDGPU-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
347; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.execute:
348; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
349; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
350; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check1:
351; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
352; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.end:
353; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
354; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
355; AMDGPU-DISABLED2:       worker_state_machine.done.barrier:
356; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
357; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
358; AMDGPU-DISABLED2:       thread.user_code.check:
359; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
360; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
361; AMDGPU-DISABLED2:       common.ret:
362; AMDGPU-DISABLED2-NEXT:    ret void
363; AMDGPU-DISABLED2:       user_code.entry:
364; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
365; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
366; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
367; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
368; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
369;
370; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
371; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
372; NVPTX-DISABLED1-NEXT:  entry:
373; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
374; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
375; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
376; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
377; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
378; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
379; NVPTX-DISABLED1:       is_worker_check:
380; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
381; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
382; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
383; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
384; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
385; NVPTX-DISABLED1:       worker_state_machine.begin:
386; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
387; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
388; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
389; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
390; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
391; NVPTX-DISABLED1:       worker_state_machine.finished:
392; NVPTX-DISABLED1-NEXT:    ret void
393; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
394; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
395; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
396; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
397; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
398; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
399; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
400; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
401; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
402; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
403; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
404; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
405; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
406; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
407; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
408; NVPTX-DISABLED1:       thread.user_code.check:
409; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
410; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
411; NVPTX-DISABLED1:       common.ret:
412; NVPTX-DISABLED1-NEXT:    ret void
413; NVPTX-DISABLED1:       user_code.entry:
414; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
415; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
416; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
417; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
418; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
419;
420; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
421; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
422; NVPTX-DISABLED2-NEXT:  entry:
423; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
424; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
425; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
426; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
427; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
428; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
429; NVPTX-DISABLED2:       is_worker_check:
430; NVPTX-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
431; NVPTX-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
432; NVPTX-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
433; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
434; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
435; NVPTX-DISABLED2:       worker_state_machine.begin:
436; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
437; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
438; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
439; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
440; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
441; NVPTX-DISABLED2:       worker_state_machine.finished:
442; NVPTX-DISABLED2-NEXT:    ret void
443; NVPTX-DISABLED2:       worker_state_machine.is_active.check:
444; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
445; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check:
446; NVPTX-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
447; NVPTX-DISABLED2:       worker_state_machine.parallel_region.execute:
448; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
449; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
450; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check1:
451; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
452; NVPTX-DISABLED2:       worker_state_machine.parallel_region.end:
453; NVPTX-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
454; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
455; NVPTX-DISABLED2:       worker_state_machine.done.barrier:
456; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
457; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
458; NVPTX-DISABLED2:       thread.user_code.check:
459; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
460; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
461; NVPTX-DISABLED2:       common.ret:
462; NVPTX-DISABLED2-NEXT:    ret void
463; NVPTX-DISABLED2:       user_code.entry:
464; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
465; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
466; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
467; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
468; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
469entry:
470  %.zero.addr = alloca i32, align 4
471  %.threadid_temp. = alloca i32, align 4
472  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
473  %exec_user_code = icmp eq i32 %0, -1
474  br i1 %exec_user_code, label %user_code.entry, label %common.ret
475
476common.ret:                                       ; preds = %entry, %user_code.entry
477  ret void
478
479user_code.entry:                                  ; preds = %entry
480  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
481  store i32 0, ptr %.zero.addr, align 4
482  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
483  call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6
484  call void @__kmpc_target_deinit()
485  br label %common.ret
486}
487
488; Function Attrs: alwaysinline convergent norecurse nounwind
489define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
490; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
491; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
492; AMDGPU-NEXT:  entry:
493; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
494; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
495; AMDGPU:       for.cond:
496; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
497; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
498; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
499; AMDGPU:       for.cond.cleanup:
500; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
501; AMDGPU-NEXT:    ret void
502; AMDGPU:       for.body:
503; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
504; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
505; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
506; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
507;
508; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
509; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
510; NVPTX-NEXT:  entry:
511; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
512; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
513; NVPTX:       for.cond:
514; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
515; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
516; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
517; NVPTX:       for.cond.cleanup:
518; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
519; NVPTX-NEXT:    ret void
520; NVPTX:       for.body:
521; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
522; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
523; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
524; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
525;
526; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
527; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
528; AMDGPU-DISABLED1-NEXT:  entry:
529; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
530; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
531; AMDGPU-DISABLED1:       for.cond:
532; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
533; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
534; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
535; AMDGPU-DISABLED1:       for.cond.cleanup:
536; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
537; AMDGPU-DISABLED1-NEXT:    ret void
538; AMDGPU-DISABLED1:       for.body:
539; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
540; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
541; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
542; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
543;
544; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
545; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
546; AMDGPU-DISABLED2-NEXT:  entry:
547; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
548; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
549; AMDGPU-DISABLED2:       for.cond:
550; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
551; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
552; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
553; AMDGPU-DISABLED2:       for.cond.cleanup:
554; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
555; AMDGPU-DISABLED2-NEXT:    ret void
556; AMDGPU-DISABLED2:       for.body:
557; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
558; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
559; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
560; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
561;
562; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
563; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
564; NVPTX-DISABLED1-NEXT:  entry:
565; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
566; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
567; NVPTX-DISABLED1:       for.cond:
568; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
569; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
570; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
571; NVPTX-DISABLED1:       for.cond.cleanup:
572; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
573; NVPTX-DISABLED1-NEXT:    ret void
574; NVPTX-DISABLED1:       for.body:
575; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
576; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
577; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
578; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
579;
580; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
581; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
582; NVPTX-DISABLED2-NEXT:  entry:
583; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
584; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
585; NVPTX-DISABLED2:       for.cond:
586; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
587; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
588; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
589; NVPTX-DISABLED2:       for.cond.cleanup:
590; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
591; NVPTX-DISABLED2-NEXT:    ret void
592; NVPTX-DISABLED2:       for.body:
593; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
594; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
595; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
596; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
597entry:
598  %captured_vars_addrs = alloca [0 x ptr], align 8
599  br label %for.cond
600
601for.cond:                                         ; preds = %for.body, %entry
602  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
603  %cmp = icmp slt i32 %i.0, 100
604  br i1 %cmp, label %for.body, label %for.cond.cleanup
605
606for.cond.cleanup:                                 ; preds = %for.cond
607  call void @spmd_amenable() #10
608  ret void
609
610for.body:                                         ; preds = %for.cond
611  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
612  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
613  %inc = add nsw i32 %i.0, 1
614  br label %for.cond, !llvm.loop !22
615}
616
617; Function Attrs: alwaysinline convergent norecurse nounwind
618define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
619; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
620; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
621; AMDGPU-NEXT:  entry:
622; AMDGPU-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
623; AMDGPU-NEXT:    ret void
624;
625; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
626; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
627; NVPTX-NEXT:  entry:
628; NVPTX-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
629; NVPTX-NEXT:    ret void
630;
631; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
632; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
633; AMDGPU-DISABLED1-NEXT:  entry:
634; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
635; AMDGPU-DISABLED1-NEXT:    ret void
636;
637; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
638; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
639; AMDGPU-DISABLED2-NEXT:  entry:
640; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
641; AMDGPU-DISABLED2-NEXT:    ret void
642;
643; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
644; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
645; NVPTX-DISABLED1-NEXT:  entry:
646; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
647; NVPTX-DISABLED1-NEXT:    ret void
648;
649; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
650; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
651; NVPTX-DISABLED2-NEXT:  entry:
652; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
653; NVPTX-DISABLED2-NEXT:    ret void
654entry:
655  call void @unknown() #11
656  ret void
657}
658
659; Function Attrs: convergent norecurse nounwind
660define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
661; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
662; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
663; AMDGPU-NEXT:  entry:
664; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
665; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
666; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
667; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
668; AMDGPU-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
669; AMDGPU-NEXT:    ret void
670;
671; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
672; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
673; NVPTX-NEXT:  entry:
674; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
675; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
676; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
677; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
678; NVPTX-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
679; NVPTX-NEXT:    ret void
680;
681; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
682; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
683; AMDGPU-DISABLED1-NEXT:  entry:
684; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
685; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
686; AMDGPU-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
687; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
688; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
689; AMDGPU-DISABLED1-NEXT:    ret void
690;
691; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
692; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
693; AMDGPU-DISABLED2-NEXT:  entry:
694; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
695; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
696; AMDGPU-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
697; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
698; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
699; AMDGPU-DISABLED2-NEXT:    ret void
700;
701; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
702; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
703; NVPTX-DISABLED1-NEXT:  entry:
704; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
705; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
706; NVPTX-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
707; NVPTX-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
708; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
709; NVPTX-DISABLED1-NEXT:    ret void
710;
711; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
712; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
713; NVPTX-DISABLED2-NEXT:  entry:
714; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
715; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
716; NVPTX-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
717; NVPTX-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
718; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
719; NVPTX-DISABLED2-NEXT:    ret void
720entry:
721  %.addr1 = alloca i32, align 4
722  %.zero.addr = alloca i32, align 4
723  %global_args = alloca ptr, align 8
724  store i32 %1, ptr %.addr1, align 4, !tbaa !18
725  store i32 0, ptr %.zero.addr, align 4
726  call void @__kmpc_get_shared_variables(ptr %global_args)
727  call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) #6
728  ret void
729}
730
731; Function Attrs: alwaysinline convergent norecurse nounwind
732define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 {
733; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
734; AMDGPU-SAME: () #[[ATTR0]] {
735; AMDGPU-NEXT:  entry:
736; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
737; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
738; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
739; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
740; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
741; AMDGPU:       common.ret:
742; AMDGPU-NEXT:    ret void
743; AMDGPU:       user_code.entry:
744; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
745; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
746; AMDGPU-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
747; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
748; AMDGPU-NEXT:    br label [[COMMON_RET]]
749;
750; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
751; NVPTX-SAME: () #[[ATTR0]] {
752; NVPTX-NEXT:  entry:
753; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
754; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
755; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
756; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
757; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
758; NVPTX:       common.ret:
759; NVPTX-NEXT:    ret void
760; NVPTX:       user_code.entry:
761; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
762; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
763; NVPTX-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
764; NVPTX-NEXT:    call void @__kmpc_target_deinit()
765; NVPTX-NEXT:    br label [[COMMON_RET]]
766;
767; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
768; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
769; AMDGPU-DISABLED1-NEXT:  entry:
770; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
771; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
772; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
773; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
774; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
775; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
776; AMDGPU-DISABLED1:       is_worker_check:
777; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
778; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
779; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
780; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
781; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
782; AMDGPU-DISABLED1:       worker_state_machine.begin:
783; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
784; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
785; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
786; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
787; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
788; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
789; AMDGPU-DISABLED1:       worker_state_machine.finished:
790; AMDGPU-DISABLED1-NEXT:    ret void
791; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
792; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
793; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
794; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
795; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
796; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
797; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
798; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
799; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
800; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
801; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
802; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
803; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
804; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
805; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
806; AMDGPU-DISABLED1:       thread.user_code.check:
807; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
808; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
809; AMDGPU-DISABLED1:       common.ret:
810; AMDGPU-DISABLED1-NEXT:    ret void
811; AMDGPU-DISABLED1:       user_code.entry:
812; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
813; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
814; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
815; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
816; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
817;
818; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
819; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
820; AMDGPU-DISABLED2-NEXT:  entry:
821; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
822; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
823; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
824; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
825; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
826; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
827; AMDGPU-DISABLED2:       is_worker_check:
828; AMDGPU-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
829; AMDGPU-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
830; AMDGPU-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
831; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
832; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
833; AMDGPU-DISABLED2:       worker_state_machine.begin:
834; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
835; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
836; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
837; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
838; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
839; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
840; AMDGPU-DISABLED2:       worker_state_machine.finished:
841; AMDGPU-DISABLED2-NEXT:    ret void
842; AMDGPU-DISABLED2:       worker_state_machine.is_active.check:
843; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
844; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check:
845; AMDGPU-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
846; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.execute:
847; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
848; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
849; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check1:
850; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
851; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.end:
852; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
853; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
854; AMDGPU-DISABLED2:       worker_state_machine.done.barrier:
855; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
856; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
857; AMDGPU-DISABLED2:       thread.user_code.check:
858; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
859; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
860; AMDGPU-DISABLED2:       common.ret:
861; AMDGPU-DISABLED2-NEXT:    ret void
862; AMDGPU-DISABLED2:       user_code.entry:
863; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
864; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
865; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
866; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
867; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
868;
869; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
870; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
871; NVPTX-DISABLED1-NEXT:  entry:
872; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
873; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
874; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
875; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
876; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
877; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
878; NVPTX-DISABLED1:       is_worker_check:
879; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
880; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
881; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
882; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
883; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
884; NVPTX-DISABLED1:       worker_state_machine.begin:
885; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
886; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
887; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
888; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
889; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
890; NVPTX-DISABLED1:       worker_state_machine.finished:
891; NVPTX-DISABLED1-NEXT:    ret void
892; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
893; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
894; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
895; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
896; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
897; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
898; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
899; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
900; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
901; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
902; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
903; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
904; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
905; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
906; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
907; NVPTX-DISABLED1:       thread.user_code.check:
908; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
909; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
910; NVPTX-DISABLED1:       common.ret:
911; NVPTX-DISABLED1-NEXT:    ret void
912; NVPTX-DISABLED1:       user_code.entry:
913; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
914; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
915; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
916; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
917; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
918;
919; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
920; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
921; NVPTX-DISABLED2-NEXT:  entry:
922; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
923; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
924; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
925; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
926; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
927; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
928; NVPTX-DISABLED2:       is_worker_check:
929; NVPTX-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
930; NVPTX-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
931; NVPTX-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
932; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
933; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
934; NVPTX-DISABLED2:       worker_state_machine.begin:
935; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
936; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
937; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
938; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
939; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
940; NVPTX-DISABLED2:       worker_state_machine.finished:
941; NVPTX-DISABLED2-NEXT:    ret void
942; NVPTX-DISABLED2:       worker_state_machine.is_active.check:
943; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
944; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check:
945; NVPTX-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
946; NVPTX-DISABLED2:       worker_state_machine.parallel_region.execute:
947; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
948; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
949; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check1:
950; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
951; NVPTX-DISABLED2:       worker_state_machine.parallel_region.end:
952; NVPTX-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
953; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
954; NVPTX-DISABLED2:       worker_state_machine.done.barrier:
955; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
956; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
957; NVPTX-DISABLED2:       thread.user_code.check:
958; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
959; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
960; NVPTX-DISABLED2:       common.ret:
961; NVPTX-DISABLED2-NEXT:    ret void
962; NVPTX-DISABLED2:       user_code.entry:
963; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
964; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
965; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
966; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
967; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
968entry:
969  %.zero.addr = alloca i32, align 4
970  %.threadid_temp. = alloca i32, align 4
971  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
972  %exec_user_code = icmp eq i32 %0, -1
973  br i1 %exec_user_code, label %user_code.entry, label %common.ret
974
975common.ret:                                       ; preds = %entry, %user_code.entry
976  ret void
977
978user_code.entry:                                  ; preds = %entry
979  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
980  store i32 0, ptr %.zero.addr, align 4
981  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
982  call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6
983  call void @__kmpc_target_deinit()
984  br label %common.ret
985}
986
987; Function Attrs: alwaysinline convergent norecurse nounwind
988define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
989; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
990; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
991; AMDGPU-NEXT:  entry:
992; AMDGPU-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
993; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
994; AMDGPU-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
995; AMDGPU-NEXT:    call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
996; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
997; AMDGPU:       for.cond:
998; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
999; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1000; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1001; AMDGPU:       for.cond.cleanup:
1002; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1003; AMDGPU-NEXT:    ret void
1004; AMDGPU:       for.body:
1005; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1006; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1007; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1008; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1009;
1010; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
1011; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1012; NVPTX-NEXT:  entry:
1013; NVPTX-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1014; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1015; NVPTX-NEXT:    call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
1016; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
1017; NVPTX:       for.cond:
1018; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1019; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1020; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1021; NVPTX:       for.cond.cleanup:
1022; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1023; NVPTX-NEXT:    ret void
1024; NVPTX:       for.body:
1025; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1026; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1027; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1028; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1029;
1030; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
1031; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1032; AMDGPU-DISABLED1-NEXT:  entry:
1033; AMDGPU-DISABLED1-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1034; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1035; AMDGPU-DISABLED1-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1036; AMDGPU-DISABLED1-NEXT:    call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
1037; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
1038; AMDGPU-DISABLED1:       for.cond:
1039; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1040; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1041; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1042; AMDGPU-DISABLED1:       for.cond.cleanup:
1043; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1044; AMDGPU-DISABLED1-NEXT:    ret void
1045; AMDGPU-DISABLED1:       for.body:
1046; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1047; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1048; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1049; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1050;
1051; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
1052; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1053; AMDGPU-DISABLED2-NEXT:  entry:
1054; AMDGPU-DISABLED2-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1055; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1056; AMDGPU-DISABLED2-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1057; AMDGPU-DISABLED2-NEXT:    call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
1058; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
1059; AMDGPU-DISABLED2:       for.cond:
1060; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1061; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1062; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1063; AMDGPU-DISABLED2:       for.cond.cleanup:
1064; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1065; AMDGPU-DISABLED2-NEXT:    ret void
1066; AMDGPU-DISABLED2:       for.body:
1067; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1068; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1069; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1070; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1071;
1072; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
1073; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1074; NVPTX-DISABLED1-NEXT:  entry:
1075; NVPTX-DISABLED1-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1076; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1077; NVPTX-DISABLED1-NEXT:    call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
1078; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
1079; NVPTX-DISABLED1:       for.cond:
1080; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1081; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1082; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1083; NVPTX-DISABLED1:       for.cond.cleanup:
1084; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1085; NVPTX-DISABLED1-NEXT:    ret void
1086; NVPTX-DISABLED1:       for.body:
1087; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1088; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1089; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1090; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1091;
1092; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
1093; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1094; NVPTX-DISABLED2-NEXT:  entry:
1095; NVPTX-DISABLED2-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1096; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1097; NVPTX-DISABLED2-NEXT:    call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
1098; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
1099; NVPTX-DISABLED2:       for.cond:
1100; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1101; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1102; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1103; NVPTX-DISABLED2:       for.cond.cleanup:
1104; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1105; NVPTX-DISABLED2-NEXT:    ret void
1106; NVPTX-DISABLED2:       for.body:
1107; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1108; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1109; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1110; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1111entry:
1112  %captured_vars_addrs = alloca [0 x ptr], align 8
1113  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
1114  call void @use(ptr nocapture %x) #10
1115  br label %for.cond
1116
1117for.cond:                                         ; preds = %for.body, %entry
1118  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
1119  %cmp = icmp slt i32 %i.0, 100
1120  br i1 %cmp, label %for.body, label %for.cond.cleanup
1121
1122for.cond.cleanup:                                 ; preds = %for.cond
1123  call void @spmd_amenable() #10
1124  call void @__kmpc_free_shared(ptr %x, i64 4)
1125  ret void
1126
1127for.body:                                         ; preds = %for.cond
1128  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
1129  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
1130  %inc = add nsw i32 %i.0, 1
1131  br label %for.cond, !llvm.loop !25
1132}
1133; Function Attrs: alwaysinline convergent norecurse nounwind
1134define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
1135; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
1136; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1137; AMDGPU-NEXT:  entry:
1138; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
1139; AMDGPU-NEXT:    ret void
1140;
1141; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
1142; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1143; NVPTX-NEXT:  entry:
1144; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
1145; NVPTX-NEXT:    ret void
1146;
1147; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
1148; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1149; AMDGPU-DISABLED1-NEXT:  entry:
1150; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
1151; AMDGPU-DISABLED1-NEXT:    ret void
1152;
1153; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
1154; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1155; AMDGPU-DISABLED2-NEXT:  entry:
1156; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
1157; AMDGPU-DISABLED2-NEXT:    ret void
1158;
1159; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
1160; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1161; NVPTX-DISABLED1-NEXT:  entry:
1162; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
1163; NVPTX-DISABLED1-NEXT:    ret void
1164;
1165; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
1166; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1167; NVPTX-DISABLED2-NEXT:  entry:
1168; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
1169; NVPTX-DISABLED2-NEXT:    ret void
1170entry:
1171  call void @unknown() #11
1172  ret void
1173}
1174
1175; Function Attrs: convergent norecurse nounwind
1176define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
1177; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1178; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1179; AMDGPU-NEXT:  entry:
1180; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1181; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1182; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1183; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1184; AMDGPU-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1185; AMDGPU-NEXT:    ret void
1186;
1187; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1188; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1189; NVPTX-NEXT:  entry:
1190; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1191; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1192; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1193; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1194; NVPTX-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1195; NVPTX-NEXT:    ret void
1196;
1197; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1198; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1199; AMDGPU-DISABLED1-NEXT:  entry:
1200; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1201; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1202; AMDGPU-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1203; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1204; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1205; AMDGPU-DISABLED1-NEXT:    ret void
1206;
1207; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1208; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1209; AMDGPU-DISABLED2-NEXT:  entry:
1210; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1211; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1212; AMDGPU-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1213; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1214; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1215; AMDGPU-DISABLED2-NEXT:    ret void
1216;
1217; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1218; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1219; NVPTX-DISABLED1-NEXT:  entry:
1220; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1221; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1222; NVPTX-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1223; NVPTX-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1224; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1225; NVPTX-DISABLED1-NEXT:    ret void
1226;
1227; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1228; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1229; NVPTX-DISABLED2-NEXT:  entry:
1230; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1231; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1232; NVPTX-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1233; NVPTX-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1234; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1235; NVPTX-DISABLED2-NEXT:    ret void
1236entry:
1237  %.addr1 = alloca i32, align 4
1238  %.zero.addr = alloca i32, align 4
1239  %global_args = alloca ptr, align 8
1240  store i32 %1, ptr %.addr1, align 4, !tbaa !18
1241  store i32 0, ptr %.zero.addr, align 4
1242  call void @__kmpc_get_shared_variables(ptr %global_args)
1243  call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #6
1244  ret void
1245}
1246
1247
1248; Function Attrs: alwaysinline convergent norecurse nounwind
1249define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 {
1250; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1251; AMDGPU-SAME: () #[[ATTR0]] {
1252; AMDGPU-NEXT:  entry:
1253; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1254; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1255; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1256; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1257; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1258; AMDGPU:       common.ret:
1259; AMDGPU-NEXT:    ret void
1260; AMDGPU:       user_code.entry:
1261; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1262; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1263; AMDGPU-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1264; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1265; AMDGPU-NEXT:    br label [[COMMON_RET]]
1266;
1267; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1268; NVPTX-SAME: () #[[ATTR0]] {
1269; NVPTX-NEXT:  entry:
1270; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1271; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1272; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1273; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1274; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1275; NVPTX:       common.ret:
1276; NVPTX-NEXT:    ret void
1277; NVPTX:       user_code.entry:
1278; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1279; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1280; NVPTX-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1281; NVPTX-NEXT:    call void @__kmpc_target_deinit()
1282; NVPTX-NEXT:    br label [[COMMON_RET]]
1283;
1284; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1285; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
1286; AMDGPU-DISABLED1-NEXT:  entry:
1287; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1288; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1289; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1290; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1291; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1292; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1293; AMDGPU-DISABLED1:       is_worker_check:
1294; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1295; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1296; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1297; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1298; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1299; AMDGPU-DISABLED1:       worker_state_machine.begin:
1300; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1301; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1302; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1303; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1304; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1305; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1306; AMDGPU-DISABLED1:       worker_state_machine.finished:
1307; AMDGPU-DISABLED1-NEXT:    ret void
1308; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
1309; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1310; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
1311; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1312; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
1313; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1314; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1315; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
1316; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1317; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
1318; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
1319; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1320; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
1321; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1322; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1323; AMDGPU-DISABLED1:       thread.user_code.check:
1324; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1325; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1326; AMDGPU-DISABLED1:       common.ret:
1327; AMDGPU-DISABLED1-NEXT:    ret void
1328; AMDGPU-DISABLED1:       user_code.entry:
1329; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1330; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1331; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1332; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
1333; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
1334;
1335; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1336; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
1337; AMDGPU-DISABLED2-NEXT:  entry:
1338; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1339; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1340; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1341; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1342; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1343; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1344; AMDGPU-DISABLED2:       is_worker_check:
1345; AMDGPU-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1346; AMDGPU-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1347; AMDGPU-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1348; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1349; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1350; AMDGPU-DISABLED2:       worker_state_machine.begin:
1351; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1352; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1353; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1354; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1355; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1356; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1357; AMDGPU-DISABLED2:       worker_state_machine.finished:
1358; AMDGPU-DISABLED2-NEXT:    ret void
1359; AMDGPU-DISABLED2:       worker_state_machine.is_active.check:
1360; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1361; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check:
1362; AMDGPU-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1363; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.execute:
1364; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1365; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1366; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check1:
1367; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1368; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.end:
1369; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
1370; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1371; AMDGPU-DISABLED2:       worker_state_machine.done.barrier:
1372; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1373; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1374; AMDGPU-DISABLED2:       thread.user_code.check:
1375; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1376; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1377; AMDGPU-DISABLED2:       common.ret:
1378; AMDGPU-DISABLED2-NEXT:    ret void
1379; AMDGPU-DISABLED2:       user_code.entry:
1380; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1381; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1382; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1383; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
1384; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
1385;
1386; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1387; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
1388; NVPTX-DISABLED1-NEXT:  entry:
1389; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1390; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1391; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1392; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1393; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1394; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1395; NVPTX-DISABLED1:       is_worker_check:
1396; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1397; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1398; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1399; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1400; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1401; NVPTX-DISABLED1:       worker_state_machine.begin:
1402; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1403; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1404; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1405; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1406; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1407; NVPTX-DISABLED1:       worker_state_machine.finished:
1408; NVPTX-DISABLED1-NEXT:    ret void
1409; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
1410; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1411; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
1412; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1413; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
1414; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1415; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1416; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
1417; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1418; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
1419; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
1420; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1421; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
1422; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1423; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1424; NVPTX-DISABLED1:       thread.user_code.check:
1425; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1426; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1427; NVPTX-DISABLED1:       common.ret:
1428; NVPTX-DISABLED1-NEXT:    ret void
1429; NVPTX-DISABLED1:       user_code.entry:
1430; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1431; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1432; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1433; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
1434; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
1435;
1436; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1437; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
1438; NVPTX-DISABLED2-NEXT:  entry:
1439; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1440; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1441; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1442; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1443; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1444; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1445; NVPTX-DISABLED2:       is_worker_check:
1446; NVPTX-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1447; NVPTX-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1448; NVPTX-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1449; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1450; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1451; NVPTX-DISABLED2:       worker_state_machine.begin:
1452; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1453; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1454; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1455; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1456; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1457; NVPTX-DISABLED2:       worker_state_machine.finished:
1458; NVPTX-DISABLED2-NEXT:    ret void
1459; NVPTX-DISABLED2:       worker_state_machine.is_active.check:
1460; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1461; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check:
1462; NVPTX-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1463; NVPTX-DISABLED2:       worker_state_machine.parallel_region.execute:
1464; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1465; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1466; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check1:
1467; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1468; NVPTX-DISABLED2:       worker_state_machine.parallel_region.end:
1469; NVPTX-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
1470; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1471; NVPTX-DISABLED2:       worker_state_machine.done.barrier:
1472; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1473; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1474; NVPTX-DISABLED2:       thread.user_code.check:
1475; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1476; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1477; NVPTX-DISABLED2:       common.ret:
1478; NVPTX-DISABLED2-NEXT:    ret void
1479; NVPTX-DISABLED2:       user_code.entry:
1480; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1481; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1482; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1483; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
1484; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
1485entry:
1486  %.zero.addr = alloca i32, align 4
1487  %.threadid_temp. = alloca i32, align 4
1488  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1489  %exec_user_code = icmp eq i32 %0, -1
1490  br i1 %exec_user_code, label %user_code.entry, label %common.ret
1491
1492common.ret:                                       ; preds = %entry, %user_code.entry
1493  ret void
1494
1495user_code.entry:                                  ; preds = %entry
1496  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
1497  store i32 0, ptr %.zero.addr, align 4
1498  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
1499  call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6
1500  call void @__kmpc_target_deinit()
1501  br label %common.ret
1502}
1503
1504; Function Attrs: alwaysinline convergent norecurse nounwind
1505define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
1506; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
1507; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1508; AMDGPU-NEXT:  entry:
1509; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1510; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
1511; AMDGPU:       for.cond:
1512; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1513; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1514; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1515; AMDGPU:       for.cond.cleanup:
1516; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1517; AMDGPU-NEXT:    ret void
1518; AMDGPU:       for.body:
1519; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1520; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1521; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1522; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1523; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1524;
1525; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
1526; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1527; NVPTX-NEXT:  entry:
1528; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1529; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
1530; NVPTX:       for.cond:
1531; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1532; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1533; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1534; NVPTX:       for.cond.cleanup:
1535; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1536; NVPTX-NEXT:    ret void
1537; NVPTX:       for.body:
1538; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1539; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1540; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1541; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1542; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1543;
1544; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
1545; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1546; AMDGPU-DISABLED1-NEXT:  entry:
1547; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1548; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
1549; AMDGPU-DISABLED1:       for.cond:
1550; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1551; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1552; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1553; AMDGPU-DISABLED1:       for.cond.cleanup:
1554; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1555; AMDGPU-DISABLED1-NEXT:    ret void
1556; AMDGPU-DISABLED1:       for.body:
1557; AMDGPU-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1558; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1559; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1560; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1561; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1562;
1563; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
1564; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1565; AMDGPU-DISABLED2-NEXT:  entry:
1566; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1567; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
1568; AMDGPU-DISABLED2:       for.cond:
1569; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1570; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1571; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1572; AMDGPU-DISABLED2:       for.cond.cleanup:
1573; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1574; AMDGPU-DISABLED2-NEXT:    ret void
1575; AMDGPU-DISABLED2:       for.body:
1576; AMDGPU-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1577; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1578; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1579; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1580; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1581;
1582; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
1583; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1584; NVPTX-DISABLED1-NEXT:  entry:
1585; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1586; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
1587; NVPTX-DISABLED1:       for.cond:
1588; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1589; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1590; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1591; NVPTX-DISABLED1:       for.cond.cleanup:
1592; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1593; NVPTX-DISABLED1-NEXT:    ret void
1594; NVPTX-DISABLED1:       for.body:
1595; NVPTX-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1596; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1597; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1598; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1599; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1600;
1601; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
1602; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1603; NVPTX-DISABLED2-NEXT:  entry:
1604; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1605; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
1606; NVPTX-DISABLED2:       for.cond:
1607; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1608; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1609; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1610; NVPTX-DISABLED2:       for.cond.cleanup:
1611; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
1612; NVPTX-DISABLED2-NEXT:    ret void
1613; NVPTX-DISABLED2:       for.body:
1614; NVPTX-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1615; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1616; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1617; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
1618; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1619entry:
1620  %captured_vars_addrs = alloca [1 x ptr], align 8
1621  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
1622  br label %for.cond
1623
1624for.cond:                                         ; preds = %for.body, %entry
1625  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
1626  %cmp = icmp slt i32 %i.0, 100
1627  br i1 %cmp, label %for.body, label %for.cond.cleanup
1628
1629for.cond.cleanup:                                 ; preds = %for.cond
1630  call void @spmd_amenable() #10
1631  call void @__kmpc_free_shared(ptr %x, i64 4)
1632  ret void
1633
1634for.body:                                         ; preds = %for.cond
1635  store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
1636  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
1637  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1)
1638  %inc = add nsw i32 %i.0, 1
1639  br label %for.cond, !llvm.loop !28
1640}
1641
1642; Function Attrs: alwaysinline convergent norecurse nounwind
1643define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
1644; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
1645; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1646; AMDGPU-NEXT:  entry:
1647; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1648; AMDGPU-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1649; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1650; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
1651; AMDGPU-NEXT:    ret void
1652;
1653; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
1654; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1655; NVPTX-NEXT:  entry:
1656; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1657; NVPTX-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1658; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1659; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
1660; NVPTX-NEXT:    ret void
1661;
1662; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
1663; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1664; AMDGPU-DISABLED1-NEXT:  entry:
1665; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1666; AMDGPU-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1667; AMDGPU-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1668; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
1669; AMDGPU-DISABLED1-NEXT:    ret void
1670;
1671; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
1672; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1673; AMDGPU-DISABLED2-NEXT:  entry:
1674; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1675; AMDGPU-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1676; AMDGPU-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1677; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
1678; AMDGPU-DISABLED2-NEXT:    ret void
1679;
1680; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
1681; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1682; NVPTX-DISABLED1-NEXT:  entry:
1683; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1684; NVPTX-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1685; NVPTX-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1686; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
1687; NVPTX-DISABLED1-NEXT:    ret void
1688;
1689; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
1690; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1691; NVPTX-DISABLED2-NEXT:  entry:
1692; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1693; NVPTX-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1694; NVPTX-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1695; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
1696; NVPTX-DISABLED2-NEXT:    ret void
1697entry:
1698  %0 = load i32, ptr %x, align 4, !tbaa !18
1699  %inc = add nsw i32 %0, 1
1700  store i32 %inc, ptr %x, align 4, !tbaa !18
1701  call void @unknown() #11
1702  ret void
1703}
1704
1705; Function Attrs: convergent norecurse nounwind
1706define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
1707; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1708; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1709; AMDGPU-NEXT:  entry:
1710; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1711; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1712; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1713; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1714; AMDGPU-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1715; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1716; AMDGPU-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1717; AMDGPU-NEXT:    ret void
1718;
1719; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1720; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1721; NVPTX-NEXT:  entry:
1722; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1723; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1724; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1725; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1726; NVPTX-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1727; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1728; NVPTX-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1729; NVPTX-NEXT:    ret void
1730;
1731; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1732; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1733; AMDGPU-DISABLED1-NEXT:  entry:
1734; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1735; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1736; AMDGPU-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1737; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1738; AMDGPU-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1739; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1740; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1741; AMDGPU-DISABLED1-NEXT:    ret void
1742;
1743; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1744; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1745; AMDGPU-DISABLED2-NEXT:  entry:
1746; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1747; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1748; AMDGPU-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1749; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1750; AMDGPU-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1751; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1752; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1753; AMDGPU-DISABLED2-NEXT:    ret void
1754;
1755; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1756; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1757; NVPTX-DISABLED1-NEXT:  entry:
1758; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1759; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1760; NVPTX-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1761; NVPTX-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1762; NVPTX-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1763; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1764; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1765; NVPTX-DISABLED1-NEXT:    ret void
1766;
1767; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1768; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1769; NVPTX-DISABLED2-NEXT:  entry:
1770; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
1771; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1772; NVPTX-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1773; NVPTX-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1774; NVPTX-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1775; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1776; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1777; NVPTX-DISABLED2-NEXT:    ret void
1778entry:
1779  %.addr1 = alloca i32, align 4
1780  %.zero.addr = alloca i32, align 4
1781  %global_args = alloca ptr, align 8
1782  store i32 %1, ptr %.addr1, align 4, !tbaa !18
1783  store i32 0, ptr %.zero.addr, align 4
1784  call void @__kmpc_get_shared_variables(ptr %global_args)
1785  %2 = load ptr, ptr %global_args, align 8
1786  %3 = load ptr, ptr %2, align 8, !tbaa !26
1787  call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
1788  ret void
1789}
1790
1791; Function Attrs: alwaysinline convergent norecurse nounwind
1792define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 {
1793; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1794; AMDGPU-SAME: () #[[ATTR0]] {
1795; AMDGPU-NEXT:  entry:
1796; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1797; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1798; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1799; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1800; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1801; AMDGPU:       common.ret:
1802; AMDGPU-NEXT:    ret void
1803; AMDGPU:       user_code.entry:
1804; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1805; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1806; AMDGPU-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1807; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
1808; AMDGPU-NEXT:    br label [[COMMON_RET]]
1809;
1810; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1811; NVPTX-SAME: () #[[ATTR0]] {
1812; NVPTX-NEXT:  entry:
1813; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1814; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1815; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1816; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1817; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1818; NVPTX:       common.ret:
1819; NVPTX-NEXT:    ret void
1820; NVPTX:       user_code.entry:
1821; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1822; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1823; NVPTX-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1824; NVPTX-NEXT:    call void @__kmpc_target_deinit()
1825; NVPTX-NEXT:    br label [[COMMON_RET]]
1826;
1827; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1828; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
1829; AMDGPU-DISABLED1-NEXT:  entry:
1830; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1831; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1832; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1833; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1834; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1835; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1836; AMDGPU-DISABLED1:       is_worker_check:
1837; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1838; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1839; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1840; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1841; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1842; AMDGPU-DISABLED1:       worker_state_machine.begin:
1843; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1844; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1845; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1846; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1847; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1848; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1849; AMDGPU-DISABLED1:       worker_state_machine.finished:
1850; AMDGPU-DISABLED1-NEXT:    ret void
1851; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
1852; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1853; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
1854; AMDGPU-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1855; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
1856; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
1857; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1858; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check1:
1859; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1860; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
1861; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
1862; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1863; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
1864; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1865; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1866; AMDGPU-DISABLED1:       thread.user_code.check:
1867; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1868; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1869; AMDGPU-DISABLED1:       common.ret:
1870; AMDGPU-DISABLED1-NEXT:    ret void
1871; AMDGPU-DISABLED1:       user_code.entry:
1872; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1873; AMDGPU-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1874; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1875; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
1876; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
1877;
1878; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1879; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
1880; AMDGPU-DISABLED2-NEXT:  entry:
1881; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1882; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1883; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1884; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1885; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1886; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1887; AMDGPU-DISABLED2:       is_worker_check:
1888; AMDGPU-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1889; AMDGPU-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1890; AMDGPU-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1891; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1892; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1893; AMDGPU-DISABLED2:       worker_state_machine.begin:
1894; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1895; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1896; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1897; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1898; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1899; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1900; AMDGPU-DISABLED2:       worker_state_machine.finished:
1901; AMDGPU-DISABLED2-NEXT:    ret void
1902; AMDGPU-DISABLED2:       worker_state_machine.is_active.check:
1903; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1904; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check:
1905; AMDGPU-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1906; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.execute:
1907; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
1908; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1909; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check1:
1910; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1911; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.end:
1912; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
1913; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1914; AMDGPU-DISABLED2:       worker_state_machine.done.barrier:
1915; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1916; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1917; AMDGPU-DISABLED2:       thread.user_code.check:
1918; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1919; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1920; AMDGPU-DISABLED2:       common.ret:
1921; AMDGPU-DISABLED2-NEXT:    ret void
1922; AMDGPU-DISABLED2:       user_code.entry:
1923; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1924; AMDGPU-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1925; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1926; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
1927; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
1928;
1929; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1930; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
1931; NVPTX-DISABLED1-NEXT:  entry:
1932; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1933; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1934; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1935; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1936; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1937; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1938; NVPTX-DISABLED1:       is_worker_check:
1939; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1940; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1941; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1942; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1943; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1944; NVPTX-DISABLED1:       worker_state_machine.begin:
1945; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1946; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1947; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1948; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1949; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1950; NVPTX-DISABLED1:       worker_state_machine.finished:
1951; NVPTX-DISABLED1-NEXT:    ret void
1952; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
1953; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1954; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
1955; NVPTX-DISABLED1-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1956; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
1957; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
1958; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1959; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check1:
1960; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1961; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
1962; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
1963; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1964; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
1965; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1966; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
1967; NVPTX-DISABLED1:       thread.user_code.check:
1968; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1969; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1970; NVPTX-DISABLED1:       common.ret:
1971; NVPTX-DISABLED1-NEXT:    ret void
1972; NVPTX-DISABLED1:       user_code.entry:
1973; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1974; NVPTX-DISABLED1-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1975; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1976; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
1977; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
1978;
1979; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1980; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
1981; NVPTX-DISABLED2-NEXT:  entry:
1982; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1983; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1984; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1985; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1986; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1987; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1988; NVPTX-DISABLED2:       is_worker_check:
1989; NVPTX-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1990; NVPTX-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1991; NVPTX-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1992; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1993; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1994; NVPTX-DISABLED2:       worker_state_machine.begin:
1995; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1996; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1997; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1998; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1999; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2000; NVPTX-DISABLED2:       worker_state_machine.finished:
2001; NVPTX-DISABLED2-NEXT:    ret void
2002; NVPTX-DISABLED2:       worker_state_machine.is_active.check:
2003; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2004; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check:
2005; NVPTX-DISABLED2-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2006; NVPTX-DISABLED2:       worker_state_machine.parallel_region.execute:
2007; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2008; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2009; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check1:
2010; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2011; NVPTX-DISABLED2:       worker_state_machine.parallel_region.end:
2012; NVPTX-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
2013; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2014; NVPTX-DISABLED2:       worker_state_machine.done.barrier:
2015; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2016; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2017; NVPTX-DISABLED2:       thread.user_code.check:
2018; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2019; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2020; NVPTX-DISABLED2:       common.ret:
2021; NVPTX-DISABLED2-NEXT:    ret void
2022; NVPTX-DISABLED2:       user_code.entry:
2023; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2024; NVPTX-DISABLED2-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
2025; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2026; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
2027; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
2028entry:
2029  %.zero.addr = alloca i32, align 4
2030  %.threadid_temp. = alloca i32, align 4
2031  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
2032  %exec_user_code = icmp eq i32 %0, -1
2033  br i1 %exec_user_code, label %user_code.entry, label %common.ret
2034
2035common.ret:                                       ; preds = %entry, %user_code.entry
2036  ret void
2037
2038user_code.entry:                                  ; preds = %entry
2039  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
2040  store i32 0, ptr %.zero.addr, align 4
2041  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
2042  call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6
2043  call void @__kmpc_target_deinit()
2044  br label %common.ret
2045}
2046
2047; Function Attrs: alwaysinline convergent norecurse nounwind
2048define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
2049; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
2050; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2051; AMDGPU-NEXT:  entry:
2052; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2053; AMDGPU-NEXT:    br label [[REGION_CHECK_TID:%.*]]
2054; AMDGPU:       region.check.tid:
2055; AMDGPU-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
2056; AMDGPU-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
2057; AMDGPU-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
2058; AMDGPU:       region.guarded:
2059; AMDGPU-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
2060; AMDGPU-NEXT:    br label [[REGION_GUARDED_END:%.*]]
2061; AMDGPU:       region.guarded.end:
2062; AMDGPU-NEXT:    br label [[REGION_BARRIER]]
2063; AMDGPU:       region.barrier:
2064; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
2065; AMDGPU-NEXT:    br label [[REGION_EXIT:%.*]]
2066; AMDGPU:       region.exit:
2067; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
2068; AMDGPU:       for.cond:
2069; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2070; AMDGPU-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2071; AMDGPU-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2072; AMDGPU:       for.cond.cleanup:
2073; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
2074; AMDGPU-NEXT:    ret void
2075; AMDGPU:       for.body:
2076; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2077; AMDGPU-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2078; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2079; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
2080; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2081;
2082; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
2083; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2084; NVPTX-NEXT:  entry:
2085; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2086; NVPTX-NEXT:    br label [[REGION_CHECK_TID:%.*]]
2087; NVPTX:       region.check.tid:
2088; NVPTX-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
2089; NVPTX-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
2090; NVPTX-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
2091; NVPTX:       region.guarded:
2092; NVPTX-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
2093; NVPTX-NEXT:    br label [[REGION_GUARDED_END:%.*]]
2094; NVPTX:       region.guarded.end:
2095; NVPTX-NEXT:    br label [[REGION_BARRIER]]
2096; NVPTX:       region.barrier:
2097; NVPTX-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
2098; NVPTX-NEXT:    br label [[REGION_EXIT:%.*]]
2099; NVPTX:       region.exit:
2100; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
2101; NVPTX:       for.cond:
2102; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2103; NVPTX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2104; NVPTX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2105; NVPTX:       for.cond.cleanup:
2106; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
2107; NVPTX-NEXT:    ret void
2108; NVPTX:       for.body:
2109; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2110; NVPTX-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2111; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2112; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
2113; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2114;
2115; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
2116; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2117; AMDGPU-DISABLED1-NEXT:  entry:
2118; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2119; AMDGPU-DISABLED1-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
2120; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
2121; AMDGPU-DISABLED1:       for.cond:
2122; AMDGPU-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2123; AMDGPU-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2124; AMDGPU-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2125; AMDGPU-DISABLED1:       for.cond.cleanup:
2126; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
2127; AMDGPU-DISABLED1-NEXT:    ret void
2128; AMDGPU-DISABLED1:       for.body:
2129; AMDGPU-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2130; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2131; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2132; AMDGPU-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
2133; AMDGPU-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2134;
2135; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
2136; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2137; AMDGPU-DISABLED2-NEXT:  entry:
2138; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2139; AMDGPU-DISABLED2-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
2140; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
2141; AMDGPU-DISABLED2:       for.cond:
2142; AMDGPU-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2143; AMDGPU-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2144; AMDGPU-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2145; AMDGPU-DISABLED2:       for.cond.cleanup:
2146; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
2147; AMDGPU-DISABLED2-NEXT:    ret void
2148; AMDGPU-DISABLED2:       for.body:
2149; AMDGPU-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2150; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2151; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2152; AMDGPU-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
2153; AMDGPU-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2154;
2155; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
2156; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2157; NVPTX-DISABLED1-NEXT:  entry:
2158; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2159; NVPTX-DISABLED1-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
2160; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND:%.*]]
2161; NVPTX-DISABLED1:       for.cond:
2162; NVPTX-DISABLED1-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2163; NVPTX-DISABLED1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2164; NVPTX-DISABLED1-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2165; NVPTX-DISABLED1:       for.cond.cleanup:
2166; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
2167; NVPTX-DISABLED1-NEXT:    ret void
2168; NVPTX-DISABLED1:       for.body:
2169; NVPTX-DISABLED1-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2170; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2171; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2172; NVPTX-DISABLED1-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
2173; NVPTX-DISABLED1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2174;
2175; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
2176; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2177; NVPTX-DISABLED2-NEXT:  entry:
2178; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2179; NVPTX-DISABLED2-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
2180; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND:%.*]]
2181; NVPTX-DISABLED2:       for.cond:
2182; NVPTX-DISABLED2-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2183; NVPTX-DISABLED2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2184; NVPTX-DISABLED2-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2185; NVPTX-DISABLED2:       for.cond.cleanup:
2186; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
2187; NVPTX-DISABLED2-NEXT:    ret void
2188; NVPTX-DISABLED2:       for.body:
2189; NVPTX-DISABLED2-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2190; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2191; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2192; NVPTX-DISABLED2-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
2193; NVPTX-DISABLED2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2194entry:
2195  %captured_vars_addrs = alloca [1 x ptr], align 8
2196  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
2197  store i32 42, ptr %x, align 4, !tbaa !18
2198  br label %for.cond
2199
2200for.cond:                                         ; preds = %for.body, %entry
2201  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
2202  %cmp = icmp slt i32 %i.0, 100
2203  br i1 %cmp, label %for.body, label %for.cond.cleanup
2204
2205for.cond.cleanup:                                 ; preds = %for.cond
2206  call void @spmd_amenable() #10
2207  call void @__kmpc_free_shared(ptr %x, i64 4)
2208  ret void
2209
2210for.body:                                         ; preds = %for.cond
2211  store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
2212  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
2213  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 1)
2214  %inc = add nsw i32 %i.0, 1
2215  br label %for.cond, !llvm.loop !29
2216}
2217
2218; Function Attrs: alwaysinline convergent norecurse nounwind
2219define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
2220; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
2221; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2222; AMDGPU-NEXT:  entry:
2223; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2224; AMDGPU-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2225; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2226; AMDGPU-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2227; AMDGPU-NEXT:    ret void
2228;
2229; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
2230; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2231; NVPTX-NEXT:  entry:
2232; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2233; NVPTX-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2234; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2235; NVPTX-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2236; NVPTX-NEXT:    ret void
2237;
2238; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
2239; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2240; AMDGPU-DISABLED1-NEXT:  entry:
2241; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2242; AMDGPU-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2243; AMDGPU-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2244; AMDGPU-DISABLED1-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2245; AMDGPU-DISABLED1-NEXT:    ret void
2246;
2247; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
2248; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2249; AMDGPU-DISABLED2-NEXT:  entry:
2250; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2251; AMDGPU-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2252; AMDGPU-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2253; AMDGPU-DISABLED2-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2254; AMDGPU-DISABLED2-NEXT:    ret void
2255;
2256; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
2257; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2258; NVPTX-DISABLED1-NEXT:  entry:
2259; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2260; NVPTX-DISABLED1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2261; NVPTX-DISABLED1-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2262; NVPTX-DISABLED1-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2263; NVPTX-DISABLED1-NEXT:    ret void
2264;
2265; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
2266; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2267; NVPTX-DISABLED2-NEXT:  entry:
2268; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2269; NVPTX-DISABLED2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2270; NVPTX-DISABLED2-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2271; NVPTX-DISABLED2-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2272; NVPTX-DISABLED2-NEXT:    ret void
2273entry:
2274  %0 = load i32, ptr %x, align 4, !tbaa !18
2275  %inc = add nsw i32 %0, 1
2276  store i32 %inc, ptr %x, align 4, !tbaa !18
2277  call void @unknowni32p(ptr %x) #11
2278  ret void
2279}
2280
2281; Function Attrs: convergent norecurse nounwind
2282define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
2283; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2284; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2285; AMDGPU-NEXT:  entry:
2286; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2287; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2288; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2289; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2290; AMDGPU-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2291; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2292; AMDGPU-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2293; AMDGPU-NEXT:    ret void
2294;
2295; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2296; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2297; NVPTX-NEXT:  entry:
2298; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2299; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2300; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2301; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2302; NVPTX-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2303; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2304; NVPTX-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2305; NVPTX-NEXT:    ret void
2306;
2307; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2308; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2309; AMDGPU-DISABLED1-NEXT:  entry:
2310; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2311; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2312; AMDGPU-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2313; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2314; AMDGPU-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2315; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2316; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2317; AMDGPU-DISABLED1-NEXT:    ret void
2318;
2319; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2320; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2321; AMDGPU-DISABLED2-NEXT:  entry:
2322; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2323; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2324; AMDGPU-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2325; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2326; AMDGPU-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2327; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2328; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2329; AMDGPU-DISABLED2-NEXT:    ret void
2330;
2331; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2332; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2333; NVPTX-DISABLED1-NEXT:  entry:
2334; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2335; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2336; NVPTX-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2337; NVPTX-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2338; NVPTX-DISABLED1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2339; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2340; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2341; NVPTX-DISABLED1-NEXT:    ret void
2342;
2343; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2344; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2345; NVPTX-DISABLED2-NEXT:  entry:
2346; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
2347; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2348; NVPTX-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2349; NVPTX-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2350; NVPTX-DISABLED2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2351; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2352; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2353; NVPTX-DISABLED2-NEXT:    ret void
2354entry:
2355  %.addr1 = alloca i32, align 4
2356  %.zero.addr = alloca i32, align 4
2357  %global_args = alloca ptr, align 8
2358  store i32 %1, ptr %.addr1, align 4, !tbaa !18
2359  store i32 0, ptr %.zero.addr, align 4
2360  call void @__kmpc_get_shared_variables(ptr %global_args)
2361  %2 = load ptr, ptr %global_args, align 8
2362  %3 = load ptr, ptr %2, align 8, !tbaa !26
2363  call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
2364  ret void
2365}
2366
2367; Function Attrs: alwaysinline convergent norecurse nounwind
2368define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 {
2369; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2370; AMDGPU-SAME: () #[[ATTR0]] {
2371; AMDGPU-NEXT:  entry:
2372; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2373; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2374; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2375; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2376; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2377; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2378; AMDGPU:       is_worker_check:
2379; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2380; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2381; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2382; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2383; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2384; AMDGPU:       worker_state_machine.begin:
2385; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2386; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2387; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2388; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2389; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2390; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2391; AMDGPU:       worker_state_machine.finished:
2392; AMDGPU-NEXT:    ret void
2393; AMDGPU:       worker_state_machine.is_active.check:
2394; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2395; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
2396; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2397; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2398; AMDGPU:       worker_state_machine.parallel_region.end:
2399; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
2400; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2401; AMDGPU:       worker_state_machine.done.barrier:
2402; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2403; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2404; AMDGPU:       thread.user_code.check:
2405; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2406; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2407; AMDGPU:       common.ret:
2408; AMDGPU-NEXT:    ret void
2409; AMDGPU:       user_code.entry:
2410; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2411; AMDGPU-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2412; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
2413; AMDGPU-NEXT:    br label [[COMMON_RET]]
2414;
2415; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2416; NVPTX-SAME: () #[[ATTR0]] {
2417; NVPTX-NEXT:  entry:
2418; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2419; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2420; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2421; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2422; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2423; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2424; NVPTX:       is_worker_check:
2425; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2426; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2427; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2428; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2429; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2430; NVPTX:       worker_state_machine.begin:
2431; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2432; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2433; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2434; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2435; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2436; NVPTX:       worker_state_machine.finished:
2437; NVPTX-NEXT:    ret void
2438; NVPTX:       worker_state_machine.is_active.check:
2439; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2440; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
2441; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2442; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2443; NVPTX:       worker_state_machine.parallel_region.end:
2444; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
2445; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2446; NVPTX:       worker_state_machine.done.barrier:
2447; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2448; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2449; NVPTX:       thread.user_code.check:
2450; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2451; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2452; NVPTX:       common.ret:
2453; NVPTX-NEXT:    ret void
2454; NVPTX:       user_code.entry:
2455; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2456; NVPTX-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2457; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2458; NVPTX-NEXT:    br label [[COMMON_RET]]
2459;
2460; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2461; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
2462; AMDGPU-DISABLED1-NEXT:  entry:
2463; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2464; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2465; AMDGPU-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2466; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2467; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2468; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2469; AMDGPU-DISABLED1:       is_worker_check:
2470; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2471; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2472; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2473; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2474; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2475; AMDGPU-DISABLED1:       worker_state_machine.begin:
2476; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2477; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2478; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2479; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2480; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2481; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2482; AMDGPU-DISABLED1:       worker_state_machine.finished:
2483; AMDGPU-DISABLED1-NEXT:    ret void
2484; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
2485; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2486; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
2487; AMDGPU-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2488; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2489; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
2490; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
2491; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2492; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
2493; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2494; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2495; AMDGPU-DISABLED1:       thread.user_code.check:
2496; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2497; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2498; AMDGPU-DISABLED1:       common.ret:
2499; AMDGPU-DISABLED1-NEXT:    ret void
2500; AMDGPU-DISABLED1:       user_code.entry:
2501; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2502; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2503; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
2504; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
2505;
2506; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2507; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
2508; AMDGPU-DISABLED2-NEXT:  entry:
2509; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2510; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2511; AMDGPU-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2512; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2513; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2514; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2515; AMDGPU-DISABLED2:       is_worker_check:
2516; AMDGPU-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2517; AMDGPU-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2518; AMDGPU-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2519; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2520; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2521; AMDGPU-DISABLED2:       worker_state_machine.begin:
2522; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2523; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2524; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2525; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2526; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2527; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2528; AMDGPU-DISABLED2:       worker_state_machine.finished:
2529; AMDGPU-DISABLED2-NEXT:    ret void
2530; AMDGPU-DISABLED2:       worker_state_machine.is_active.check:
2531; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2532; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.fallback.execute:
2533; AMDGPU-DISABLED2-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2534; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2535; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.end:
2536; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
2537; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2538; AMDGPU-DISABLED2:       worker_state_machine.done.barrier:
2539; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2540; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2541; AMDGPU-DISABLED2:       thread.user_code.check:
2542; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2543; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2544; AMDGPU-DISABLED2:       common.ret:
2545; AMDGPU-DISABLED2-NEXT:    ret void
2546; AMDGPU-DISABLED2:       user_code.entry:
2547; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2548; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2549; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
2550; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
2551;
2552; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2553; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
2554; NVPTX-DISABLED1-NEXT:  entry:
2555; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2556; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2557; NVPTX-DISABLED1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2558; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2559; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2560; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2561; NVPTX-DISABLED1:       is_worker_check:
2562; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2563; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2564; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2565; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2566; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2567; NVPTX-DISABLED1:       worker_state_machine.begin:
2568; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2569; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2570; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2571; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2572; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2573; NVPTX-DISABLED1:       worker_state_machine.finished:
2574; NVPTX-DISABLED1-NEXT:    ret void
2575; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
2576; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2577; NVPTX-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
2578; NVPTX-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2579; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2580; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
2581; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
2582; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2583; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
2584; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2585; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2586; NVPTX-DISABLED1:       thread.user_code.check:
2587; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2588; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2589; NVPTX-DISABLED1:       common.ret:
2590; NVPTX-DISABLED1-NEXT:    ret void
2591; NVPTX-DISABLED1:       user_code.entry:
2592; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2593; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2594; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
2595; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
2596;
2597; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2598; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
2599; NVPTX-DISABLED2-NEXT:  entry:
2600; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2601; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2602; NVPTX-DISABLED2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2603; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2604; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2605; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2606; NVPTX-DISABLED2:       is_worker_check:
2607; NVPTX-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2608; NVPTX-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2609; NVPTX-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2610; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2611; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2612; NVPTX-DISABLED2:       worker_state_machine.begin:
2613; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2614; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2615; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2616; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2617; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2618; NVPTX-DISABLED2:       worker_state_machine.finished:
2619; NVPTX-DISABLED2-NEXT:    ret void
2620; NVPTX-DISABLED2:       worker_state_machine.is_active.check:
2621; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2622; NVPTX-DISABLED2:       worker_state_machine.parallel_region.fallback.execute:
2623; NVPTX-DISABLED2-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2624; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2625; NVPTX-DISABLED2:       worker_state_machine.parallel_region.end:
2626; NVPTX-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
2627; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2628; NVPTX-DISABLED2:       worker_state_machine.done.barrier:
2629; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2630; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2631; NVPTX-DISABLED2:       thread.user_code.check:
2632; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2633; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2634; NVPTX-DISABLED2:       common.ret:
2635; NVPTX-DISABLED2-NEXT:    ret void
2636; NVPTX-DISABLED2:       user_code.entry:
2637; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2638; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2639; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
2640; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
2641entry:
2642  %.zero.addr = alloca i32, align 4
2643  %.threadid_temp. = alloca i32, align 4
2644  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2645  %exec_user_code = icmp eq i32 %0, -1
2646  br i1 %exec_user_code, label %user_code.entry, label %common.ret
2647
2648common.ret:                                       ; preds = %entry, %user_code.entry
2649  ret void
2650
2651user_code.entry:                                  ; preds = %entry
2652  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
2653  store i32 0, ptr %.zero.addr, align 4
2654  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
2655  call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6
2656  call void @__kmpc_target_deinit()
2657  br label %common.ret
2658}
2659
2660; Function Attrs: alwaysinline convergent norecurse nounwind
2661define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
2662; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
2663; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2664; AMDGPU-NEXT:  entry:
2665; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
2666; AMDGPU-NEXT:    ret void
2667;
2668; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
2669; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2670; NVPTX-NEXT:  entry:
2671; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
2672; NVPTX-NEXT:    ret void
2673;
2674; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
2675; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2676; AMDGPU-DISABLED1-NEXT:  entry:
2677; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
2678; AMDGPU-DISABLED1-NEXT:    ret void
2679;
2680; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
2681; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2682; AMDGPU-DISABLED2-NEXT:  entry:
2683; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
2684; AMDGPU-DISABLED2-NEXT:    ret void
2685;
2686; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
2687; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2688; NVPTX-DISABLED1-NEXT:  entry:
2689; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
2690; NVPTX-DISABLED1-NEXT:    ret void
2691;
2692; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
2693; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2694; NVPTX-DISABLED2-NEXT:  entry:
2695; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
2696; NVPTX-DISABLED2-NEXT:    ret void
2697entry:
2698  call void @unknown() #11
2699  ret void
2700}
2701
2702; Function Attrs: alwaysinline convergent norecurse nounwind
2703define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
2704; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2705; AMDGPU-SAME: () #[[ATTR0]] {
2706; AMDGPU-NEXT:  entry:
2707; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2708; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2709; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2710; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2711; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2712; AMDGPU:       is_worker_check:
2713; AMDGPU-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2714; AMDGPU-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2715; AMDGPU-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2716; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2717; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2718; AMDGPU:       worker_state_machine.begin:
2719; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2720; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2721; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2722; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2723; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2724; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2725; AMDGPU:       worker_state_machine.finished:
2726; AMDGPU-NEXT:    ret void
2727; AMDGPU:       worker_state_machine.is_active.check:
2728; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2729; AMDGPU:       worker_state_machine.parallel_region.check:
2730; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2731; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2732; AMDGPU:       worker_state_machine.parallel_region.execute:
2733; AMDGPU-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2734; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2735; AMDGPU:       worker_state_machine.parallel_region.fallback.execute:
2736; AMDGPU-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2737; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2738; AMDGPU:       worker_state_machine.parallel_region.end:
2739; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
2740; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2741; AMDGPU:       worker_state_machine.done.barrier:
2742; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2743; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2744; AMDGPU:       thread.user_code.check:
2745; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2746; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2747; AMDGPU:       common.ret:
2748; AMDGPU-NEXT:    ret void
2749; AMDGPU:       user_code.entry:
2750; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2751; AMDGPU-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2752; AMDGPU-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2753; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2754; AMDGPU-NEXT:    call void @__kmpc_target_deinit()
2755; AMDGPU-NEXT:    br label [[COMMON_RET]]
2756;
2757; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2758; NVPTX-SAME: () #[[ATTR0]] {
2759; NVPTX-NEXT:  entry:
2760; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2761; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2762; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2763; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2764; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2765; NVPTX:       is_worker_check:
2766; NVPTX-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2767; NVPTX-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2768; NVPTX-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2769; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2770; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2771; NVPTX:       worker_state_machine.begin:
2772; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2773; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2774; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2775; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2776; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2777; NVPTX:       worker_state_machine.finished:
2778; NVPTX-NEXT:    ret void
2779; NVPTX:       worker_state_machine.is_active.check:
2780; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2781; NVPTX:       worker_state_machine.parallel_region.check:
2782; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2783; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2784; NVPTX:       worker_state_machine.parallel_region.execute:
2785; NVPTX-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2786; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2787; NVPTX:       worker_state_machine.parallel_region.fallback.execute:
2788; NVPTX-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2789; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2790; NVPTX:       worker_state_machine.parallel_region.end:
2791; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
2792; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2793; NVPTX:       worker_state_machine.done.barrier:
2794; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2795; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2796; NVPTX:       thread.user_code.check:
2797; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2798; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2799; NVPTX:       common.ret:
2800; NVPTX-NEXT:    ret void
2801; NVPTX:       user_code.entry:
2802; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2803; NVPTX-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2804; NVPTX-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2805; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2806; NVPTX-NEXT:    call void @__kmpc_target_deinit()
2807; NVPTX-NEXT:    br label [[COMMON_RET]]
2808;
2809; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2810; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
2811; AMDGPU-DISABLED1-NEXT:  entry:
2812; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2813; AMDGPU-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2814; AMDGPU-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2815; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2816; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2817; AMDGPU-DISABLED1:       is_worker_check:
2818; AMDGPU-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2819; AMDGPU-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2820; AMDGPU-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2821; AMDGPU-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2822; AMDGPU-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2823; AMDGPU-DISABLED1:       worker_state_machine.begin:
2824; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2825; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2826; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2827; AMDGPU-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2828; AMDGPU-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2829; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2830; AMDGPU-DISABLED1:       worker_state_machine.finished:
2831; AMDGPU-DISABLED1-NEXT:    ret void
2832; AMDGPU-DISABLED1:       worker_state_machine.is_active.check:
2833; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2834; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.check:
2835; AMDGPU-DISABLED1-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2836; AMDGPU-DISABLED1-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2837; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.execute:
2838; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2839; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2840; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
2841; AMDGPU-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2842; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2843; AMDGPU-DISABLED1:       worker_state_machine.parallel_region.end:
2844; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
2845; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2846; AMDGPU-DISABLED1:       worker_state_machine.done.barrier:
2847; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2848; AMDGPU-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2849; AMDGPU-DISABLED1:       thread.user_code.check:
2850; AMDGPU-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2851; AMDGPU-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2852; AMDGPU-DISABLED1:       common.ret:
2853; AMDGPU-DISABLED1-NEXT:    ret void
2854; AMDGPU-DISABLED1:       user_code.entry:
2855; AMDGPU-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2856; AMDGPU-DISABLED1-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2857; AMDGPU-DISABLED1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2858; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2859; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
2860; AMDGPU-DISABLED1-NEXT:    br label [[COMMON_RET]]
2861;
2862; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2863; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
2864; AMDGPU-DISABLED2-NEXT:  entry:
2865; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2866; AMDGPU-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2867; AMDGPU-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2868; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2869; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2870; AMDGPU-DISABLED2:       is_worker_check:
2871; AMDGPU-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2872; AMDGPU-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2873; AMDGPU-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2874; AMDGPU-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2875; AMDGPU-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2876; AMDGPU-DISABLED2:       worker_state_machine.begin:
2877; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2878; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2879; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2880; AMDGPU-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2881; AMDGPU-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2882; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2883; AMDGPU-DISABLED2:       worker_state_machine.finished:
2884; AMDGPU-DISABLED2-NEXT:    ret void
2885; AMDGPU-DISABLED2:       worker_state_machine.is_active.check:
2886; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2887; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.check:
2888; AMDGPU-DISABLED2-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2889; AMDGPU-DISABLED2-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2890; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.execute:
2891; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2892; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2893; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.fallback.execute:
2894; AMDGPU-DISABLED2-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2895; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2896; AMDGPU-DISABLED2:       worker_state_machine.parallel_region.end:
2897; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
2898; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2899; AMDGPU-DISABLED2:       worker_state_machine.done.barrier:
2900; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2901; AMDGPU-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2902; AMDGPU-DISABLED2:       thread.user_code.check:
2903; AMDGPU-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2904; AMDGPU-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2905; AMDGPU-DISABLED2:       common.ret:
2906; AMDGPU-DISABLED2-NEXT:    ret void
2907; AMDGPU-DISABLED2:       user_code.entry:
2908; AMDGPU-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2909; AMDGPU-DISABLED2-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2910; AMDGPU-DISABLED2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2911; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2912; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
2913; AMDGPU-DISABLED2-NEXT:    br label [[COMMON_RET]]
2914;
2915; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2916; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
2917; NVPTX-DISABLED1-NEXT:  entry:
2918; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2919; NVPTX-DISABLED1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2920; NVPTX-DISABLED1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2921; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2922; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2923; NVPTX-DISABLED1:       is_worker_check:
2924; NVPTX-DISABLED1-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2925; NVPTX-DISABLED1-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2926; NVPTX-DISABLED1-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2927; NVPTX-DISABLED1-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2928; NVPTX-DISABLED1-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2929; NVPTX-DISABLED1:       worker_state_machine.begin:
2930; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2931; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2932; NVPTX-DISABLED1-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2933; NVPTX-DISABLED1-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2934; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2935; NVPTX-DISABLED1:       worker_state_machine.finished:
2936; NVPTX-DISABLED1-NEXT:    ret void
2937; NVPTX-DISABLED1:       worker_state_machine.is_active.check:
2938; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2939; NVPTX-DISABLED1:       worker_state_machine.parallel_region.check:
2940; NVPTX-DISABLED1-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2941; NVPTX-DISABLED1-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2942; NVPTX-DISABLED1:       worker_state_machine.parallel_region.execute:
2943; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2944; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2945; NVPTX-DISABLED1:       worker_state_machine.parallel_region.fallback.execute:
2946; NVPTX-DISABLED1-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2947; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2948; NVPTX-DISABLED1:       worker_state_machine.parallel_region.end:
2949; NVPTX-DISABLED1-NEXT:    call void @__kmpc_kernel_end_parallel()
2950; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2951; NVPTX-DISABLED1:       worker_state_machine.done.barrier:
2952; NVPTX-DISABLED1-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2953; NVPTX-DISABLED1-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
2954; NVPTX-DISABLED1:       thread.user_code.check:
2955; NVPTX-DISABLED1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2956; NVPTX-DISABLED1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2957; NVPTX-DISABLED1:       common.ret:
2958; NVPTX-DISABLED1-NEXT:    ret void
2959; NVPTX-DISABLED1:       user_code.entry:
2960; NVPTX-DISABLED1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2961; NVPTX-DISABLED1-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2962; NVPTX-DISABLED1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2963; NVPTX-DISABLED1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2964; NVPTX-DISABLED1-NEXT:    call void @__kmpc_target_deinit()
2965; NVPTX-DISABLED1-NEXT:    br label [[COMMON_RET]]
2966;
2967; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2968; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
2969; NVPTX-DISABLED2-NEXT:  entry:
2970; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2971; NVPTX-DISABLED2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2972; NVPTX-DISABLED2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2973; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2974; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2975; NVPTX-DISABLED2:       is_worker_check:
2976; NVPTX-DISABLED2-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2977; NVPTX-DISABLED2-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2978; NVPTX-DISABLED2-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2979; NVPTX-DISABLED2-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2980; NVPTX-DISABLED2-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2981; NVPTX-DISABLED2:       worker_state_machine.begin:
2982; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2983; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2984; NVPTX-DISABLED2-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2985; NVPTX-DISABLED2-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2986; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2987; NVPTX-DISABLED2:       worker_state_machine.finished:
2988; NVPTX-DISABLED2-NEXT:    ret void
2989; NVPTX-DISABLED2:       worker_state_machine.is_active.check:
2990; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2991; NVPTX-DISABLED2:       worker_state_machine.parallel_region.check:
2992; NVPTX-DISABLED2-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2993; NVPTX-DISABLED2-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2994; NVPTX-DISABLED2:       worker_state_machine.parallel_region.execute:
2995; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2996; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2997; NVPTX-DISABLED2:       worker_state_machine.parallel_region.fallback.execute:
2998; NVPTX-DISABLED2-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2999; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3000; NVPTX-DISABLED2:       worker_state_machine.parallel_region.end:
3001; NVPTX-DISABLED2-NEXT:    call void @__kmpc_kernel_end_parallel()
3002; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3003; NVPTX-DISABLED2:       worker_state_machine.done.barrier:
3004; NVPTX-DISABLED2-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3005; NVPTX-DISABLED2-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
3006; NVPTX-DISABLED2:       thread.user_code.check:
3007; NVPTX-DISABLED2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3008; NVPTX-DISABLED2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3009; NVPTX-DISABLED2:       common.ret:
3010; NVPTX-DISABLED2-NEXT:    ret void
3011; NVPTX-DISABLED2:       user_code.entry:
3012; NVPTX-DISABLED2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3013; NVPTX-DISABLED2-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3014; NVPTX-DISABLED2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3015; NVPTX-DISABLED2-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3016; NVPTX-DISABLED2-NEXT:    call void @__kmpc_target_deinit()
3017; NVPTX-DISABLED2-NEXT:    br label [[COMMON_RET]]
3018entry:
3019  %captured_vars_addrs = alloca [0 x ptr], align 8
3020  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
3021  %exec_user_code = icmp eq i32 %0, -1
3022  br i1 %exec_user_code, label %user_code.entry, label %common.ret
3023
3024common.ret:                                       ; preds = %entry, %user_code.entry
3025  ret void
3026
3027user_code.entry:                                  ; preds = %entry
3028  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
3029  %2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$")
3030  %3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2)
3031  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0)
3032  call void @__kmpc_target_deinit()
3033  br label %common.ret
3034}
3035
3036; Function Attrs: alwaysinline convergent nounwind
3037define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #9 {
3038; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined.
3039; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3040; AMDGPU-NEXT:  entry:
3041; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
3042; AMDGPU-NEXT:    ret void
3043;
3044; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined.
3045; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3046; NVPTX-NEXT:  entry:
3047; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
3048; NVPTX-NEXT:    ret void
3049;
3050; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
3051; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3052; AMDGPU-DISABLED1-NEXT:  entry:
3053; AMDGPU-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
3054; AMDGPU-DISABLED1-NEXT:    ret void
3055;
3056; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
3057; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3058; AMDGPU-DISABLED2-NEXT:  entry:
3059; AMDGPU-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
3060; AMDGPU-DISABLED2-NEXT:    ret void
3061;
3062; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
3063; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3064; NVPTX-DISABLED1-NEXT:  entry:
3065; NVPTX-DISABLED1-NEXT:    call void @spmd_amenable() #[[ATTR7]]
3066; NVPTX-DISABLED1-NEXT:    ret void
3067;
3068; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
3069; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3070; NVPTX-DISABLED2-NEXT:  entry:
3071; NVPTX-DISABLED2-NEXT:    call void @spmd_amenable() #[[ATTR7]]
3072; NVPTX-DISABLED2-NEXT:    ret void
3073entry:
3074  call void @spmd_amenable() #10
3075  ret void
3076}
3077
3078; Function Attrs: convergent norecurse nounwind
3079define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #3 {
3080entry:
3081  %2 = getelementptr inbounds %struct.kmp_task_t, ptr %1, i32 0, i32 2
3082  %3 = load ptr, ptr %1, align 8, !tbaa !30
3083  call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #6
3084  ret i32 0
3085}
3086
3087; Function Attrs: nounwind
3088declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #6
3089
3090; Function Attrs: nounwind
3091declare i32 @__kmpc_omp_task(ptr, i32, ptr) #6
3092
3093; Function Attrs: nosync nounwind
3094declare void @__kmpc_free_shared(ptr nocapture, i64) #8
3095
3096; Function Attrs: nofree nosync nounwind
3097declare ptr @__kmpc_alloc_shared(i64) #7
3098
3099; Function Attrs: convergent
3100declare void @use(ptr nocapture) #5
3101
3102; Function Attrs: convergent
3103declare void @unknown() #2
3104declare void @unknowni32p(ptr) #2
3105
3106; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
3107declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
3108
3109; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
3110define weak i32 @__kmpc_target_init(ptr, ptr) {
3111; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
3112; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3113; AMDGPU-NEXT:    ret i32 0
3114;
3115; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
3116; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3117; NVPTX-NEXT:    ret i32 0
3118;
3119; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
3120; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3121; AMDGPU-DISABLED1-NEXT:    ret i32 0
3122;
3123; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
3124; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3125; AMDGPU-DISABLED2-NEXT:    ret i32 0
3126;
3127; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
3128; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3129; NVPTX-DISABLED1-NEXT:    ret i32 0
3130;
3131; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
3132; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3133; NVPTX-DISABLED2-NEXT:    ret i32 0
3134  ret i32 0
3135}
3136
3137declare void @__kmpc_get_shared_variables(ptr)
3138
3139; Function Attrs: alwaysinline
3140declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4
3141
3142; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
3143declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
3144
3145; Function Attrs: convergent
3146declare void @spmd_amenable() #5
3147
3148; Function Attrs: nounwind
3149declare i32 @__kmpc_global_thread_num(ptr) #6
3150
3151declare void @__kmpc_target_deinit()
3152
3153
3154; Function Attrs: alwaysinline convergent norecurse nounwind
3155define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
3156; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
3157; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3158; AMDGPU-NEXT:  entry:
3159; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
3160; AMDGPU-NEXT:    ret void
3161;
3162; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
3163; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3164; NVPTX-NEXT:  entry:
3165; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
3166; NVPTX-NEXT:    ret void
3167;
3168; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
3169; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3170; AMDGPU-DISABLED1-NEXT:  entry:
3171; AMDGPU-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
3172; AMDGPU-DISABLED1-NEXT:    ret void
3173;
3174; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
3175; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3176; AMDGPU-DISABLED2-NEXT:  entry:
3177; AMDGPU-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
3178; AMDGPU-DISABLED2-NEXT:    ret void
3179;
3180; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
3181; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3182; NVPTX-DISABLED1-NEXT:  entry:
3183; NVPTX-DISABLED1-NEXT:    call void @unknown() #[[ATTR8]]
3184; NVPTX-DISABLED1-NEXT:    ret void
3185;
3186; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
3187; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3188; NVPTX-DISABLED2-NEXT:  entry:
3189; NVPTX-DISABLED2-NEXT:    call void @unknown() #[[ATTR8]]
3190; NVPTX-DISABLED2-NEXT:    ret void
3191;
3192entry:
3193  call void @unknown() #11
3194  ret void
3195}
3196
3197; Function Attrs: convergent norecurse nounwind
3198define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
3199; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3200; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3201; AMDGPU-NEXT:  entry:
3202; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3203; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3204; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3205; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3206; AMDGPU-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3207; AMDGPU-NEXT:    ret void
3208;
3209; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3210; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3211; NVPTX-NEXT:  entry:
3212; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3213; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3214; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3215; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3216; NVPTX-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3217; NVPTX-NEXT:    ret void
3218;
3219; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3220; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3221; AMDGPU-DISABLED1-NEXT:  entry:
3222; AMDGPU-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3223; AMDGPU-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3224; AMDGPU-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3225; AMDGPU-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3226; AMDGPU-DISABLED1-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3227; AMDGPU-DISABLED1-NEXT:    ret void
3228;
3229; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3230; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3231; AMDGPU-DISABLED2-NEXT:  entry:
3232; AMDGPU-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3233; AMDGPU-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3234; AMDGPU-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3235; AMDGPU-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3236; AMDGPU-DISABLED2-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3237; AMDGPU-DISABLED2-NEXT:    ret void
3238;
3239; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3240; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3241; NVPTX-DISABLED1-NEXT:  entry:
3242; NVPTX-DISABLED1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3243; NVPTX-DISABLED1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3244; NVPTX-DISABLED1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3245; NVPTX-DISABLED1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3246; NVPTX-DISABLED1-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3247; NVPTX-DISABLED1-NEXT:    ret void
3248;
3249; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3250; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3251; NVPTX-DISABLED2-NEXT:  entry:
3252; NVPTX-DISABLED2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
3253; NVPTX-DISABLED2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3254; NVPTX-DISABLED2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3255; NVPTX-DISABLED2-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3256; NVPTX-DISABLED2-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3257; NVPTX-DISABLED2-NEXT:    ret void
3258entry:
3259  %.addr1 = alloca i32, align 4
3260  %.zero.addr = alloca i32, align 4
3261  %global_args = alloca ptr, align 8
3262  store i32 %1, ptr %.addr1, align 4, !tbaa !18
3263  store i32 0, ptr %.zero.addr, align 4
3264  call void @__kmpc_get_shared_variables(ptr %global_args)
3265  call void @__omp_outlined__9(ptr %.addr1, ptr %.zero.addr) #6
3266  ret void
3267}
3268
3269declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block();
3270
3271attributes #0 = { alwaysinline convergent norecurse nounwind "kernel" }
3272attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
3273attributes #2 = { convergent }
3274attributes #3 = { convergent norecurse nounwind }
3275attributes #4 = { alwaysinline }
3276attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" }
3277attributes #6 = { nounwind }
3278attributes #7 = { nofree nosync nounwind }
3279attributes #8 = { nosync nounwind }
3280attributes #9 = { alwaysinline convergent nounwind }
3281attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" }
3282attributes #11 = { convergent }
3283
3284!omp_offload.info = !{!0, !1, !2, !3, !4, !5}
3285!llvm.module.flags = !{!12, !13, !14, !15, !16}
3286!llvm.ident = !{!17}
3287
3288!0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3289!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3290!2 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3291!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3292!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3293!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3294!12 = !{i32 1, !"wchar_size", i32 4}
3295!13 = !{i32 7, !"openmp", i32 50}
3296!14 = !{i32 7, !"openmp-device", i32 50}
3297!15 = !{i32 8, !"PIC Level", i32 2}
3298!16 = !{i32 7, !"frame-pointer", i32 2}
3299!17 = !{!"clang version 14.0.0"}
3300!18 = !{!19, !19, i64 0}
3301!19 = !{!"int", !20, i64 0}
3302!20 = !{!"omnipotent char", !21, i64 0}
3303!21 = !{!"Simple C/C++ TBAA"}
3304!22 = distinct !{!22, !23, !24}
3305!23 = !{!"llvm.loop.mustprogress"}
3306!24 = !{!"llvm.loop.unroll.disable"}
3307!25 = distinct !{!25, !23, !24}
3308!26 = !{!27, !27, i64 0}
3309!27 = !{!"any pointer", !20, i64 0}
3310!28 = distinct !{!28, !23, !24}
3311!29 = distinct !{!29, !23, !24}
3312!30 = !{!31, !27, i64 0}
3313!31 = !{!"kmp_task_t_with_privates", !32, i64 0}
3314!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
3315;.
3316; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3317; AMDGPU: attributes #[[ATTR1]] = { norecurse }
3318; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3319; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3320; AMDGPU: attributes #[[ATTR4]] = { nounwind }
3321; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3322; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3323; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3324; AMDGPU: attributes #[[ATTR8]] = { convergent }
3325; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3326; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3327; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3328;.
3329; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3330; NVPTX: attributes #[[ATTR1]] = { norecurse }
3331; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3332; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3333; NVPTX: attributes #[[ATTR4]] = { nounwind }
3334; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3335; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3336; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3337; NVPTX: attributes #[[ATTR8]] = { convergent }
3338; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3339; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3340; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3341;.
3342; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3343; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse }
3344; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3345; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3346; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { nounwind }
3347; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3348; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3349; AMDGPU-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3350; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent }
3351; AMDGPU-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3352; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3353; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3354;.
3355; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3356; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse }
3357; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3358; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3359; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { nounwind }
3360; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3361; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3362; AMDGPU-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3363; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent }
3364; AMDGPU-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3365; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3366; AMDGPU-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3367;.
3368; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3369; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse }
3370; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3371; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3372; NVPTX-DISABLED1: attributes #[[ATTR4]] = { nounwind }
3373; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3374; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3375; NVPTX-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3376; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent }
3377; NVPTX-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3378; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3379; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3380;.
3381; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3382; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse }
3383; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3384; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3385; NVPTX-DISABLED2: attributes #[[ATTR4]] = { nounwind }
3386; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3387; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3388; NVPTX-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3389; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent }
3390; NVPTX-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3391; NVPTX-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3392; NVPTX-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3393;.
3394; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3395; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3396; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3397; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3398; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3399; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3400; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3401; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3402; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3403; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3404; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3405; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3406; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3407; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3408; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3409; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"}
3410; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3411; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"}
3412; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"}
3413; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3414; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3415; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3416; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3417; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3418;.
3419; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3420; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3421; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3422; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3423; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3424; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3425; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3426; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3427; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3428; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3429; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3430; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3431; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3432; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3433; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3434; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
3435; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3436; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
3437; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
3438; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3439; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3440; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3441; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3442; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3443;.
3444; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3445; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3446; AMDGPU-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3447; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3448; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3449; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3450; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3451; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3452; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3453; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3454; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3455; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3456; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3457; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3458; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3459; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
3460; AMDGPU-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3461; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
3462; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
3463; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3464; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3465; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3466; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3467; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3468;.
3469; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3470; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3471; AMDGPU-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3472; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3473; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3474; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3475; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3476; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3477; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3478; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3479; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3480; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3481; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3482; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3483; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3484; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
3485; AMDGPU-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3486; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
3487; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
3488; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3489; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3490; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3491; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3492; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3493;.
3494; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3495; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3496; NVPTX-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3497; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3498; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3499; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3500; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3501; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3502; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3503; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3504; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3505; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3506; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3507; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3508; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3509; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
3510; NVPTX-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3511; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
3512; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
3513; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3514; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3515; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3516; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3517; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3518;.
3519; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3520; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3521; NVPTX-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3522; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3523; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3524; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3525; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3526; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3527; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3528; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3529; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3530; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3531; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3532; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3533; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3534; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
3535; NVPTX-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3536; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
3537; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
3538; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3539; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3540; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3541; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3542; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3543;.
3544