Lines Matching +full:nvptx +full:- +full:-

1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
3 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
4 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1
5 ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED2
6 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED1
7 ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED2
109 ; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
110 ; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
117 ; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
122 ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
123 ; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
124 ; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
125 ; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
126 ; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
127 ; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
128 ; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
129 ; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
130 ; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
131 ; NVPTX: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
132 ; NVPTX: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
133 ; NVPTX: @__omp_outlined__9_wrapper.ID = private constant i8 undef
135 ; AMDGPU-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
136 ; AMDGPU-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
137 ; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
138 ; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
139 ; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
140 ; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
141 ; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
142 ; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
143 ; AMDGPU-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
144 ; AMDGPU-DISABLED1: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
145 ; AMDGPU-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef
146 ; AMDGPU-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef
147 ; AMDGPU-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef
148 ; AMDGPU-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef
149 ; AMDGPU-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef
151 ; AMDGPU-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
152 ; AMDGPU-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
153 ; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
154 ; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
155 ; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
156 ; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
157 ; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
158 ; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
159 ; AMDGPU-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
160 ; AMDGPU-DISABLED2: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4
161 ; AMDGPU-DISABLED2: @__omp_outlined__1_wrapper.ID = private constant i8 undef
162 ; AMDGPU-DISABLED2: @__omp_outlined__3_wrapper.ID = private constant i8 undef
163 ; AMDGPU-DISABLED2: @__omp_outlined__5_wrapper.ID = private constant i8 undef
164 ; AMDGPU-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef
165 ; AMDGPU-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef
167 ; NVPTX-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
168 ; NVPTX-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
169 ; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
170 ; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
171 ; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
172 ; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
173 ; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
174 ; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
175 ; NVPTX-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
176 ; NVPTX-DISABLED1: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
177 ; NVPTX-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef
178 ; NVPTX-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef
179 ; NVPTX-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef
180 ; NVPTX-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef
181 ; NVPTX-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef
183 ; NVPTX-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
184 ; NVPTX-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
185 ; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
186 ; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
187 ; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
188 ; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
189 ; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
190 ; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
191 ; NVPTX-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4
192 ; NVPTX-DISABLED2: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4
193 ; NVPTX-DISABLED2: @__omp_outlined__1_wrapper.ID = private constant i8 undef
194 ; NVPTX-DISABLED2: @__omp_outlined__3_wrapper.ID = private constant i8 undef
195 ; NVPTX-DISABLED2: @__omp_outlined__5_wrapper.ID = private constant i8 undef
196 ; NVPTX-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef
197 ; NVPTX-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef
200 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
201 ; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
202 ; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
203 ; AMDGPU-NEXT: ret void
205 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
206 ; NVPTX-SAME: () #[[ATTR0:[0-9]+]] {
207 ; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
208 ; NVPTX-NEXT: ret void
210 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
211 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
212 ; AMDGPU-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
213 ; AMDGPU-DISABLED1-NEXT: ret void
215 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
216 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
217 ; AMDGPU-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
218 ; AMDGPU-DISABLED2-NEXT: ret void
220 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
221 ; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
222 ; NVPTX-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
223 ; NVPTX-DISABLED1-NEXT: ret void
225 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
226 ; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
227 ; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
228 ; NVPTX-DISABLED2-NEXT: ret void
234 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
235 ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
236 ; AMDGPU-NEXT: entry:
237 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
238 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
239 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
240 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
241 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
243 ; AMDGPU-NEXT: ret void
245 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
246 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
247 ; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
248 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
249 ; AMDGPU-NEXT: br label [[COMMON_RET]]
251 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
252 ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
253 ; NVPTX-NEXT: entry:
254 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
255 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
256 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
257 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
258 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
259 ; NVPTX: common.ret:
260 ; NVPTX-NEXT: ret void
261 ; NVPTX: user_code.entry:
262 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
263 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
264 ; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
265 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
266 ; NVPTX-NEXT: br label [[COMMON_RET]]
268 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
269 ; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
270 ; AMDGPU-DISABLED1-NEXT: entry:
271 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
272 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
273 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
274 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
275 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
276 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
277 ; AMDGPU-DISABLED1: is_worker_check:
278 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
279 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
280 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
281 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
282 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
283 ; AMDGPU-DISABLED1: worker_state_machine.begin:
284 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
285 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
286 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
287 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
288 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
289 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
290 ; AMDGPU-DISABLED1: worker_state_machine.finished:
291 ; AMDGPU-DISABLED1-NEXT: ret void
292 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
293 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
294 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
295 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
296 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
297 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
298 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
299 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
300 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
301 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
302 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
303 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
304 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
305 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
306 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
307 ; AMDGPU-DISABLED1: thread.user_code.check:
308 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
309 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
310 ; AMDGPU-DISABLED1: common.ret:
311 ; AMDGPU-DISABLED1-NEXT: ret void
312 ; AMDGPU-DISABLED1: user_code.entry:
313 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
314 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
315 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
316 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
317 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
319 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
320 ; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
321 ; AMDGPU-DISABLED2-NEXT: entry:
322 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
323 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
324 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
325 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
326 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
327 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
328 ; AMDGPU-DISABLED2: is_worker_check:
329 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
330 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
331 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
332 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
333 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
334 ; AMDGPU-DISABLED2: worker_state_machine.begin:
335 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
336 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
337 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
338 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
339 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
340 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
341 ; AMDGPU-DISABLED2: worker_state_machine.finished:
342 ; AMDGPU-DISABLED2-NEXT: ret void
343 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
344 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
345 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
346 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
347 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
348 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
349 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
350 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
351 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
352 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
353 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
354 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
355 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
356 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
357 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
358 ; AMDGPU-DISABLED2: thread.user_code.check:
359 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
360 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
361 ; AMDGPU-DISABLED2: common.ret:
362 ; AMDGPU-DISABLED2-NEXT: ret void
363 ; AMDGPU-DISABLED2: user_code.entry:
364 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
365 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
366 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
367 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
368 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
370 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
371 ; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
372 ; NVPTX-DISABLED1-NEXT: entry:
373 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
374 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
375 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
376 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
377 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
378 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
379 ; NVPTX-DISABLED1: is_worker_check:
380 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
381 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
382 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
383 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
384 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
385 ; NVPTX-DISABLED1: worker_state_machine.begin:
386 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
387 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
388 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
389 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
390 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
391 ; NVPTX-DISABLED1: worker_state_machine.finished:
392 ; NVPTX-DISABLED1-NEXT: ret void
393 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
394 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
395 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
396 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
397 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
398 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
399 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
400 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
401 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
402 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
403 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
404 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
405 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
406 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
407 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
408 ; NVPTX-DISABLED1: thread.user_code.check:
409 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
410 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
411 ; NVPTX-DISABLED1: common.ret:
412 ; NVPTX-DISABLED1-NEXT: ret void
413 ; NVPTX-DISABLED1: user_code.entry:
414 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
415 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
416 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
417 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
418 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
420 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
421 ; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
422 ; NVPTX-DISABLED2-NEXT: entry:
423 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
424 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
425 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
426 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
427 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
428 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
429 ; NVPTX-DISABLED2: is_worker_check:
430 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
431 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
432 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
433 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
434 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
435 ; NVPTX-DISABLED2: worker_state_machine.begin:
436 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
437 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
438 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
439 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
440 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
441 ; NVPTX-DISABLED2: worker_state_machine.finished:
442 ; NVPTX-DISABLED2-NEXT: ret void
443 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
444 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
445 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
446 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
447 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
448 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
449 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
450 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
451 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
452 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
453 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
454 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
455 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
456 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
457 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
458 ; NVPTX-DISABLED2: thread.user_code.check:
459 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
460 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
461 ; NVPTX-DISABLED2: common.ret:
462 ; NVPTX-DISABLED2-NEXT: ret void
463 ; NVPTX-DISABLED2: user_code.entry:
464 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
465 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
466 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
467 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
468 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
473 %exec_user_code = icmp eq i32 %0, -1
490 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
491 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
492 ; AMDGPU-NEXT: entry:
493 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
494 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
496 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
497 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
498 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
500 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
501 ; AMDGPU-NEXT: ret void
503 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
504 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
505 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
506 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
508 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
509 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
510 ; NVPTX-NEXT: entry:
511 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
512 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
513 ; NVPTX: for.cond:
514 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
515 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
516 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
517 ; NVPTX: for.cond.cleanup:
518 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
519 ; NVPTX-NEXT: ret void
520 ; NVPTX: for.body:
521 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
522 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
523 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
524 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
526 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
527 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
528 ; AMDGPU-DISABLED1-NEXT: entry:
529 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
530 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
531 ; AMDGPU-DISABLED1: for.cond:
532 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
533 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
534 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
535 ; AMDGPU-DISABLED1: for.cond.cleanup:
536 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
537 ; AMDGPU-DISABLED1-NEXT: ret void
538 ; AMDGPU-DISABLED1: for.body:
539 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
540 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
541 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
542 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
544 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
545 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
546 ; AMDGPU-DISABLED2-NEXT: entry:
547 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
548 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
549 ; AMDGPU-DISABLED2: for.cond:
550 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
551 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
552 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
553 ; AMDGPU-DISABLED2: for.cond.cleanup:
554 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
555 ; AMDGPU-DISABLED2-NEXT: ret void
556 ; AMDGPU-DISABLED2: for.body:
557 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
558 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
559 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
560 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
562 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
563 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
564 ; NVPTX-DISABLED1-NEXT: entry:
565 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
566 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
567 ; NVPTX-DISABLED1: for.cond:
568 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
569 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
570 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
571 ; NVPTX-DISABLED1: for.cond.cleanup:
572 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
573 ; NVPTX-DISABLED1-NEXT: ret void
574 ; NVPTX-DISABLED1: for.body:
575 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
576 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
577 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
578 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
580 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
581 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
582 ; NVPTX-DISABLED2-NEXT: entry:
583 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
584 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
585 ; NVPTX-DISABLED2: for.cond:
586 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
587 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
588 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
589 ; NVPTX-DISABLED2: for.cond.cleanup:
590 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
591 ; NVPTX-DISABLED2-NEXT: ret void
592 ; NVPTX-DISABLED2: for.body:
593 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
594 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
595 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
596 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
612 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
619 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
620 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
621 ; AMDGPU-NEXT: entry:
622 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
623 ; AMDGPU-NEXT: ret void
625 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
626 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
627 ; NVPTX-NEXT: entry:
628 ; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
629 ; NVPTX-NEXT: ret void
631 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
632 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
633 ; AMDGPU-DISABLED1-NEXT: entry:
634 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
635 ; AMDGPU-DISABLED1-NEXT: ret void
637 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
638 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
639 ; AMDGPU-DISABLED2-NEXT: entry:
640 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
641 ; AMDGPU-DISABLED2-NEXT: ret void
643 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
644 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
645 ; NVPTX-DISABLED1-NEXT: entry:
646 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
647 ; NVPTX-DISABLED1-NEXT: ret void
649 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
650 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
651 ; NVPTX-DISABLED2-NEXT: entry:
652 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
653 ; NVPTX-DISABLED2-NEXT: ret void
661 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
662 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
663 ; AMDGPU-NEXT: entry:
664 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
665 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
666 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
667 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
668 ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
669 ; AMDGPU-NEXT: ret void
671 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
672 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
673 ; NVPTX-NEXT: entry:
674 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
675 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
676 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
677 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
678 ; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
679 ; NVPTX-NEXT: ret void
681 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
682 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
683 ; AMDGPU-DISABLED1-NEXT: entry:
684 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
685 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
686 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
687 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
688 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
689 ; AMDGPU-DISABLED1-NEXT: ret void
691 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
692 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
693 ; AMDGPU-DISABLED2-NEXT: entry:
694 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
695 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
696 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
697 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
698 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
699 ; AMDGPU-DISABLED2-NEXT: ret void
701 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
702 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
703 ; NVPTX-DISABLED1-NEXT: entry:
704 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
705 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
706 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
707 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
708 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
709 ; NVPTX-DISABLED1-NEXT: ret void
711 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
712 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
713 ; NVPTX-DISABLED2-NEXT: entry:
714 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
715 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
716 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
717 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
718 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
719 ; NVPTX-DISABLED2-NEXT: ret void
733 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
734 ; AMDGPU-SAME: () #[[ATTR0]] {
735 ; AMDGPU-NEXT: entry:
736 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
737 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
738 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
739 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
740 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
742 ; AMDGPU-NEXT: ret void
744 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
745 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
746 ; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
747 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
748 ; AMDGPU-NEXT: br label [[COMMON_RET]]
750 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
751 ; NVPTX-SAME: () #[[ATTR0]] {
752 ; NVPTX-NEXT: entry:
753 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
754 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
755 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
756 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
757 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
758 ; NVPTX: common.ret:
759 ; NVPTX-NEXT: ret void
760 ; NVPTX: user_code.entry:
761 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
762 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
763 ; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
764 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
765 ; NVPTX-NEXT: br label [[COMMON_RET]]
767 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
768 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
769 ; AMDGPU-DISABLED1-NEXT: entry:
770 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
771 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
772 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
773 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
774 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
775 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
776 ; AMDGPU-DISABLED1: is_worker_check:
777 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
778 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
779 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
780 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
781 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
782 ; AMDGPU-DISABLED1: worker_state_machine.begin:
783 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
784 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
785 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
786 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
787 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
788 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
789 ; AMDGPU-DISABLED1: worker_state_machine.finished:
790 ; AMDGPU-DISABLED1-NEXT: ret void
791 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
792 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
793 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
794 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
795 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
796 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
797 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
798 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
799 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
800 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
801 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
802 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
803 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
804 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
805 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
806 ; AMDGPU-DISABLED1: thread.user_code.check:
807 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
808 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
809 ; AMDGPU-DISABLED1: common.ret:
810 ; AMDGPU-DISABLED1-NEXT: ret void
811 ; AMDGPU-DISABLED1: user_code.entry:
812 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
813 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
814 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
815 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
816 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
818 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
819 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
820 ; AMDGPU-DISABLED2-NEXT: entry:
821 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
822 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
823 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
824 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
825 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
826 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
827 ; AMDGPU-DISABLED2: is_worker_check:
828 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
829 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
830 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
831 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
832 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
833 ; AMDGPU-DISABLED2: worker_state_machine.begin:
834 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
835 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
836 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
837 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
838 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
839 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
840 ; AMDGPU-DISABLED2: worker_state_machine.finished:
841 ; AMDGPU-DISABLED2-NEXT: ret void
842 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
843 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
844 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
845 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
846 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
847 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
848 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
849 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
850 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
851 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
852 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
853 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
854 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
855 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
856 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
857 ; AMDGPU-DISABLED2: thread.user_code.check:
858 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
859 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
860 ; AMDGPU-DISABLED2: common.ret:
861 ; AMDGPU-DISABLED2-NEXT: ret void
862 ; AMDGPU-DISABLED2: user_code.entry:
863 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
864 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
865 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
866 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
867 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
869 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
870 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
871 ; NVPTX-DISABLED1-NEXT: entry:
872 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
873 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
874 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
875 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
876 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
877 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
878 ; NVPTX-DISABLED1: is_worker_check:
879 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
880 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
881 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
882 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
883 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
884 ; NVPTX-DISABLED1: worker_state_machine.begin:
885 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
886 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
887 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
888 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
889 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
890 ; NVPTX-DISABLED1: worker_state_machine.finished:
891 ; NVPTX-DISABLED1-NEXT: ret void
892 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
893 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
894 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
895 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
896 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
897 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
898 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
899 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
900 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
901 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
902 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
903 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
904 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
905 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
906 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
907 ; NVPTX-DISABLED1: thread.user_code.check:
908 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
909 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
910 ; NVPTX-DISABLED1: common.ret:
911 ; NVPTX-DISABLED1-NEXT: ret void
912 ; NVPTX-DISABLED1: user_code.entry:
913 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
914 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
915 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
916 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
917 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
919 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
920 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
921 ; NVPTX-DISABLED2-NEXT: entry:
922 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
923 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
924 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
925 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
926 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
927 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
928 ; NVPTX-DISABLED2: is_worker_check:
929 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
930 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
931 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
932 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
933 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
934 ; NVPTX-DISABLED2: worker_state_machine.begin:
935 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
936 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
937 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
938 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
939 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
940 ; NVPTX-DISABLED2: worker_state_machine.finished:
941 ; NVPTX-DISABLED2-NEXT: ret void
942 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
943 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
944 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
945 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
946 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
947 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
948 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
949 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
950 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
951 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
952 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
953 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
954 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
955 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
956 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
957 ; NVPTX-DISABLED2: thread.user_code.check:
958 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
959 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
960 ; NVPTX-DISABLED2: common.ret:
961 ; NVPTX-DISABLED2-NEXT: ret void
962 ; NVPTX-DISABLED2: user_code.entry:
963 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
964 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
965 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
966 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
967 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
972 %exec_user_code = icmp eq i32 %0, -1
989 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
990 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
991 ; AMDGPU-NEXT: entry:
992 ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
993 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
994 ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
995 ; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
996 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
998 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
999 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1000 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1002 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
1003 ; AMDGPU-NEXT: ret void
1005 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1006 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1007 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1008 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1010 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
1011 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1012 ; NVPTX-NEXT: entry:
1013 ; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1014 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1015 ; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
1016 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
1017 ; NVPTX: for.cond:
1018 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1019 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1020 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1021 ; NVPTX: for.cond.cleanup:
1022 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
1023 ; NVPTX-NEXT: ret void
1024 ; NVPTX: for.body:
1025 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1026 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1027 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1028 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1030 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
1031 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1032 ; AMDGPU-DISABLED1-NEXT: entry:
1033 ; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1034 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1035 ; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1036 ; AMDGPU-DISABLED1-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
1037 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
1038 ; AMDGPU-DISABLED1: for.cond:
1039 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1040 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1041 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1042 ; AMDGPU-DISABLED1: for.cond.cleanup:
1043 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
1044 ; AMDGPU-DISABLED1-NEXT: ret void
1045 ; AMDGPU-DISABLED1: for.body:
1046 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1047 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1048 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1049 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1051 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
1052 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1053 ; AMDGPU-DISABLED2-NEXT: entry:
1054 ; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
1055 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1056 ; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
1057 ; AMDGPU-DISABLED2-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
1058 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
1059 ; AMDGPU-DISABLED2: for.cond:
1060 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1061 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1062 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1063 ; AMDGPU-DISABLED2: for.cond.cleanup:
1064 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
1065 ; AMDGPU-DISABLED2-NEXT: ret void
1066 ; AMDGPU-DISABLED2: for.body:
1067 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1068 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1069 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1070 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1072 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
1073 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1074 ; NVPTX-DISABLED1-NEXT: entry:
1075 ; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1076 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1077 ; NVPTX-DISABLED1-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
1078 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
1079 ; NVPTX-DISABLED1: for.cond:
1080 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1081 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1082 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1083 ; NVPTX-DISABLED1: for.cond.cleanup:
1084 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
1085 ; NVPTX-DISABLED1-NEXT: ret void
1086 ; NVPTX-DISABLED1: for.body:
1087 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1088 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1089 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1090 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1092 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
1093 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1094 ; NVPTX-DISABLED2-NEXT: entry:
1095 ; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
1096 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
1097 ; NVPTX-DISABLED2-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
1098 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
1099 ; NVPTX-DISABLED2: for.cond:
1100 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1101 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1102 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1103 ; NVPTX-DISABLED2: for.cond.cleanup:
1104 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
1105 ; NVPTX-DISABLED2-NEXT: ret void
1106 ; NVPTX-DISABLED2: for.body:
1107 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1108 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
1109 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1110 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1129 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
1135 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
1136 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1137 ; AMDGPU-NEXT: entry:
1138 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
1139 ; AMDGPU-NEXT: ret void
1141 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
1142 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1143 ; NVPTX-NEXT: entry:
1144 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
1145 ; NVPTX-NEXT: ret void
1147 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
1148 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1149 ; AMDGPU-DISABLED1-NEXT: entry:
1150 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
1151 ; AMDGPU-DISABLED1-NEXT: ret void
1153 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
1154 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1155 ; AMDGPU-DISABLED2-NEXT: entry:
1156 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
1157 ; AMDGPU-DISABLED2-NEXT: ret void
1159 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
1160 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1161 ; NVPTX-DISABLED1-NEXT: entry:
1162 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
1163 ; NVPTX-DISABLED1-NEXT: ret void
1165 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
1166 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
1167 ; NVPTX-DISABLED2-NEXT: entry:
1168 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
1169 ; NVPTX-DISABLED2-NEXT: ret void
1177 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1178 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1179 ; AMDGPU-NEXT: entry:
1180 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1181 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1182 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1183 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1184 ; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1185 ; AMDGPU-NEXT: ret void
1187 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1188 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1189 ; NVPTX-NEXT: entry:
1190 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1191 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1192 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1193 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1194 ; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1195 ; NVPTX-NEXT: ret void
1197 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1198 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1199 ; AMDGPU-DISABLED1-NEXT: entry:
1200 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1201 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1202 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1203 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1204 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1205 ; AMDGPU-DISABLED1-NEXT: ret void
1207 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1208 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1209 ; AMDGPU-DISABLED2-NEXT: entry:
1210 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1211 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1212 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1213 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1214 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1215 ; AMDGPU-DISABLED2-NEXT: ret void
1217 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1218 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1219 ; NVPTX-DISABLED1-NEXT: entry:
1220 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1221 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1222 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1223 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1224 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1225 ; NVPTX-DISABLED1-NEXT: ret void
1227 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
1228 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1229 ; NVPTX-DISABLED2-NEXT: entry:
1230 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1231 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1232 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1233 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1234 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1235 ; NVPTX-DISABLED2-NEXT: ret void
1250 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1251 ; AMDGPU-SAME: () #[[ATTR0]] {
1252 ; AMDGPU-NEXT: entry:
1253 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1254 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1255 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1256 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1257 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1259 ; AMDGPU-NEXT: ret void
1261 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1262 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1263 ; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1264 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
1265 ; AMDGPU-NEXT: br label [[COMMON_RET]]
1267 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1268 ; NVPTX-SAME: () #[[ATTR0]] {
1269 ; NVPTX-NEXT: entry:
1270 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1271 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1272 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1273 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1274 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1275 ; NVPTX: common.ret:
1276 ; NVPTX-NEXT: ret void
1277 ; NVPTX: user_code.entry:
1278 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1279 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1280 ; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1281 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
1282 ; NVPTX-NEXT: br label [[COMMON_RET]]
1284 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1285 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
1286 ; AMDGPU-DISABLED1-NEXT: entry:
1287 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1288 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1289 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1290 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1291 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1292 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1293 ; AMDGPU-DISABLED1: is_worker_check:
1294 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1295 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1296 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1297 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1298 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1299 ; AMDGPU-DISABLED1: worker_state_machine.begin:
1300 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1301 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1302 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1303 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1304 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1305 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1306 ; AMDGPU-DISABLED1: worker_state_machine.finished:
1307 ; AMDGPU-DISABLED1-NEXT: ret void
1308 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
1309 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1310 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
1311 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1312 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
1313 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1314 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1315 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
1316 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1317 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
1318 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1319 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1320 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
1321 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1322 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1323 ; AMDGPU-DISABLED1: thread.user_code.check:
1324 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1325 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1326 ; AMDGPU-DISABLED1: common.ret:
1327 ; AMDGPU-DISABLED1-NEXT: ret void
1328 ; AMDGPU-DISABLED1: user_code.entry:
1329 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1330 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1331 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1332 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1333 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
1335 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1336 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
1337 ; AMDGPU-DISABLED2-NEXT: entry:
1338 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1339 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1340 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1341 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1342 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1343 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1344 ; AMDGPU-DISABLED2: is_worker_check:
1345 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1346 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1347 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1348 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1349 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1350 ; AMDGPU-DISABLED2: worker_state_machine.begin:
1351 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1352 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1353 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1354 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1355 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1356 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1357 ; AMDGPU-DISABLED2: worker_state_machine.finished:
1358 ; AMDGPU-DISABLED2-NEXT: ret void
1359 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
1360 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1361 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
1362 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1363 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
1364 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1365 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1366 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
1367 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1368 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
1369 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
1370 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1371 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
1372 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1373 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1374 ; AMDGPU-DISABLED2: thread.user_code.check:
1375 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1376 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1377 ; AMDGPU-DISABLED2: common.ret:
1378 ; AMDGPU-DISABLED2-NEXT: ret void
1379 ; AMDGPU-DISABLED2: user_code.entry:
1380 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1381 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1382 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1383 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
1384 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
1386 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1387 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
1388 ; NVPTX-DISABLED1-NEXT: entry:
1389 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1390 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1391 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1392 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1393 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1394 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1395 ; NVPTX-DISABLED1: is_worker_check:
1396 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1397 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1398 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1399 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1400 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1401 ; NVPTX-DISABLED1: worker_state_machine.begin:
1402 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1403 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1404 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1405 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1406 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1407 ; NVPTX-DISABLED1: worker_state_machine.finished:
1408 ; NVPTX-DISABLED1-NEXT: ret void
1409 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
1410 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1411 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
1412 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1413 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
1414 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1415 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1416 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
1417 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1418 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
1419 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1420 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1421 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
1422 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1423 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1424 ; NVPTX-DISABLED1: thread.user_code.check:
1425 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1426 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1427 ; NVPTX-DISABLED1: common.ret:
1428 ; NVPTX-DISABLED1-NEXT: ret void
1429 ; NVPTX-DISABLED1: user_code.entry:
1430 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1431 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1432 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1433 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1434 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
1436 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
1437 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
1438 ; NVPTX-DISABLED2-NEXT: entry:
1439 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1440 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1441 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1442 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
1443 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1444 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1445 ; NVPTX-DISABLED2: is_worker_check:
1446 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1447 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1448 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1449 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1450 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1451 ; NVPTX-DISABLED2: worker_state_machine.begin:
1452 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1453 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1454 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1455 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1456 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1457 ; NVPTX-DISABLED2: worker_state_machine.finished:
1458 ; NVPTX-DISABLED2-NEXT: ret void
1459 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
1460 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1461 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
1462 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1463 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
1464 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
1465 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1466 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
1467 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1468 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
1469 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
1470 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1471 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
1472 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1473 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1474 ; NVPTX-DISABLED2: thread.user_code.check:
1475 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1476 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1477 ; NVPTX-DISABLED2: common.ret:
1478 ; NVPTX-DISABLED2-NEXT: ret void
1479 ; NVPTX-DISABLED2: user_code.entry:
1480 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1481 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1482 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1483 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
1484 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
1489 %exec_user_code = icmp eq i32 %0, -1
1506 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
1507 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1508 ; AMDGPU-NEXT: entry:
1509 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1510 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
1512 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1513 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1514 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1516 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
1517 ; AMDGPU-NEXT: ret void
1519 ; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1520 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1521 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1522 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1523 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1525 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
1526 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1527 ; NVPTX-NEXT: entry:
1528 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1529 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
1530 ; NVPTX: for.cond:
1531 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1532 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1533 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1534 ; NVPTX: for.cond.cleanup:
1535 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
1536 ; NVPTX-NEXT: ret void
1537 ; NVPTX: for.body:
1538 ; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1539 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1540 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1541 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1542 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1544 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
1545 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1546 ; AMDGPU-DISABLED1-NEXT: entry:
1547 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1548 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
1549 ; AMDGPU-DISABLED1: for.cond:
1550 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1551 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1552 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1553 ; AMDGPU-DISABLED1: for.cond.cleanup:
1554 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
1555 ; AMDGPU-DISABLED1-NEXT: ret void
1556 ; AMDGPU-DISABLED1: for.body:
1557 ; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1558 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1559 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1560 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1561 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1563 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
1564 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1565 ; AMDGPU-DISABLED2-NEXT: entry:
1566 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1567 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
1568 ; AMDGPU-DISABLED2: for.cond:
1569 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1570 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1571 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1572 ; AMDGPU-DISABLED2: for.cond.cleanup:
1573 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
1574 ; AMDGPU-DISABLED2-NEXT: ret void
1575 ; AMDGPU-DISABLED2: for.body:
1576 ; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1577 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1578 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1579 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1580 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1582 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
1583 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1584 ; NVPTX-DISABLED1-NEXT: entry:
1585 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1586 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
1587 ; NVPTX-DISABLED1: for.cond:
1588 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1589 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1590 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1591 ; NVPTX-DISABLED1: for.cond.cleanup:
1592 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
1593 ; NVPTX-DISABLED1-NEXT: ret void
1594 ; NVPTX-DISABLED1: for.body:
1595 ; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1596 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1597 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1598 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1599 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1601 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
1602 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
1603 ; NVPTX-DISABLED2-NEXT: entry:
1604 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
1605 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
1606 ; NVPTX-DISABLED2: for.cond:
1607 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
1608 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
1609 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
1610 ; NVPTX-DISABLED2: for.cond.cleanup:
1611 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
1612 ; NVPTX-DISABLED2-NEXT: ret void
1613 ; NVPTX-DISABLED2: for.body:
1614 ; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
1615 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
1616 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
1617 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
1618 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
1637 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1)
1644 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
1645 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1646 ; AMDGPU-NEXT: entry:
1647 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1648 ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1649 ; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1650 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
1651 ; AMDGPU-NEXT: ret void
1653 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
1654 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1655 ; NVPTX-NEXT: entry:
1656 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1657 ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1658 ; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1659 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
1660 ; NVPTX-NEXT: ret void
1662 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
1663 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1664 ; AMDGPU-DISABLED1-NEXT: entry:
1665 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1666 ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1667 ; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1668 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
1669 ; AMDGPU-DISABLED1-NEXT: ret void
1671 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
1672 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1673 ; AMDGPU-DISABLED2-NEXT: entry:
1674 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1675 ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1676 ; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1677 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
1678 ; AMDGPU-DISABLED2-NEXT: ret void
1680 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
1681 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1682 ; NVPTX-DISABLED1-NEXT: entry:
1683 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1684 ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1685 ; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1686 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
1687 ; NVPTX-DISABLED1-NEXT: ret void
1689 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
1690 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
1691 ; NVPTX-DISABLED2-NEXT: entry:
1692 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
1693 ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
1694 ; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
1695 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
1696 ; NVPTX-DISABLED2-NEXT: ret void
1707 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1708 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1709 ; AMDGPU-NEXT: entry:
1710 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1711 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1712 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1713 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1714 ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1715 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1716 ; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1717 ; AMDGPU-NEXT: ret void
1719 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1720 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1721 ; NVPTX-NEXT: entry:
1722 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1723 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1724 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1725 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1726 ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1727 ; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1728 ; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1729 ; NVPTX-NEXT: ret void
1731 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1732 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1733 ; AMDGPU-DISABLED1-NEXT: entry:
1734 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1735 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1736 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1737 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1738 ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1739 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1740 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1741 ; AMDGPU-DISABLED1-NEXT: ret void
1743 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1744 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1745 ; AMDGPU-DISABLED2-NEXT: entry:
1746 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1747 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1748 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1749 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1750 ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1751 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1752 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1753 ; AMDGPU-DISABLED2-NEXT: ret void
1755 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1756 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1757 ; NVPTX-DISABLED1-NEXT: entry:
1758 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1759 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1760 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1761 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1762 ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1763 ; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1764 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1765 ; NVPTX-DISABLED1-NEXT: ret void
1767 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
1768 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
1769 ; NVPTX-DISABLED2-NEXT: entry:
1770 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
1771 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1772 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
1773 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
1774 ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
1775 ; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
1776 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
1777 ; NVPTX-DISABLED2-NEXT: ret void
1793 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1794 ; AMDGPU-SAME: () #[[ATTR0]] {
1795 ; AMDGPU-NEXT: entry:
1796 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1797 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1798 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1799 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1800 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1802 ; AMDGPU-NEXT: ret void
1804 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1805 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1806 ; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1807 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
1808 ; AMDGPU-NEXT: br label [[COMMON_RET]]
1810 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1811 ; NVPTX-SAME: () #[[ATTR0]] {
1812 ; NVPTX-NEXT: entry:
1813 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1814 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1815 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1816 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1817 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1818 ; NVPTX: common.ret:
1819 ; NVPTX-NEXT: ret void
1820 ; NVPTX: user_code.entry:
1821 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1822 ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1823 ; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1824 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
1825 ; NVPTX-NEXT: br label [[COMMON_RET]]
1827 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1828 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
1829 ; AMDGPU-DISABLED1-NEXT: entry:
1830 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1831 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1832 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1833 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1834 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1835 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1836 ; AMDGPU-DISABLED1: is_worker_check:
1837 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1838 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1839 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1840 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1841 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1842 ; AMDGPU-DISABLED1: worker_state_machine.begin:
1843 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1844 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1845 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1846 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1847 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1848 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1849 ; AMDGPU-DISABLED1: worker_state_machine.finished:
1850 ; AMDGPU-DISABLED1-NEXT: ret void
1851 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
1852 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1853 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
1854 ; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1855 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
1856 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
1857 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1858 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1:
1859 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1860 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
1861 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1862 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1863 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
1864 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1865 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1866 ; AMDGPU-DISABLED1: thread.user_code.check:
1867 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1868 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1869 ; AMDGPU-DISABLED1: common.ret:
1870 ; AMDGPU-DISABLED1-NEXT: ret void
1871 ; AMDGPU-DISABLED1: user_code.entry:
1872 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1873 ; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1874 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1875 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1876 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
1878 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1879 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
1880 ; AMDGPU-DISABLED2-NEXT: entry:
1881 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1882 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1883 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1884 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1885 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1886 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1887 ; AMDGPU-DISABLED2: is_worker_check:
1888 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1889 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1890 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1891 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1892 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1893 ; AMDGPU-DISABLED2: worker_state_machine.begin:
1894 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1895 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
1896 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
1897 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
1898 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1899 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1900 ; AMDGPU-DISABLED2: worker_state_machine.finished:
1901 ; AMDGPU-DISABLED2-NEXT: ret void
1902 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
1903 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1904 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
1905 ; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1906 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
1907 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
1908 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1909 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1:
1910 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1911 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
1912 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
1913 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1914 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
1915 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1916 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1917 ; AMDGPU-DISABLED2: thread.user_code.check:
1918 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1919 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1920 ; AMDGPU-DISABLED2: common.ret:
1921 ; AMDGPU-DISABLED2-NEXT: ret void
1922 ; AMDGPU-DISABLED2: user_code.entry:
1923 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1924 ; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1925 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1926 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
1927 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
1929 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1930 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
1931 ; NVPTX-DISABLED1-NEXT: entry:
1932 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1933 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1934 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1935 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1936 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1937 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1938 ; NVPTX-DISABLED1: is_worker_check:
1939 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1940 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1941 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1942 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1943 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1944 ; NVPTX-DISABLED1: worker_state_machine.begin:
1945 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1946 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1947 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1948 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1949 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
1950 ; NVPTX-DISABLED1: worker_state_machine.finished:
1951 ; NVPTX-DISABLED1-NEXT: ret void
1952 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
1953 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
1954 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
1955 ; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
1956 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
1957 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
1958 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
1959 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1:
1960 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
1961 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
1962 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
1963 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
1964 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
1965 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1966 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
1967 ; NVPTX-DISABLED1: thread.user_code.check:
1968 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
1969 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
1970 ; NVPTX-DISABLED1: common.ret:
1971 ; NVPTX-DISABLED1-NEXT: ret void
1972 ; NVPTX-DISABLED1: user_code.entry:
1973 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
1974 ; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
1975 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
1976 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
1977 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
1979 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
1980 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
1981 ; NVPTX-DISABLED2-NEXT: entry:
1982 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
1983 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1984 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1985 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
1986 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
1987 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
1988 ; NVPTX-DISABLED2: is_worker_check:
1989 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1990 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
1991 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
1992 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
1993 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
1994 ; NVPTX-DISABLED2: worker_state_machine.begin:
1995 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
1996 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
1997 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
1998 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
1999 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2000 ; NVPTX-DISABLED2: worker_state_machine.finished:
2001 ; NVPTX-DISABLED2-NEXT: ret void
2002 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
2003 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2004 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
2005 ; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
2006 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
2007 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
2008 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2009 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1:
2010 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2011 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
2012 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
2013 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2014 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
2015 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2016 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2017 ; NVPTX-DISABLED2: thread.user_code.check:
2018 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2019 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2020 ; NVPTX-DISABLED2: common.ret:
2021 ; NVPTX-DISABLED2-NEXT: ret void
2022 ; NVPTX-DISABLED2: user_code.entry:
2023 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2024 ; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
2025 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2026 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
2027 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
2032 %exec_user_code = icmp eq i32 %0, -1
2049 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
2050 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2051 ; AMDGPU-NEXT: entry:
2052 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2053 ; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]]
2055 ; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
2056 ; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
2057 ; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
2059 ; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
2060 ; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]]
2062 ; AMDGPU-NEXT: br label [[REGION_BARRIER]]
2064 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
2065 ; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]]
2067 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
2069 ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2070 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2071 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2073 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
2074 ; AMDGPU-NEXT: ret void
2076 ; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2077 ; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2078 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2079 ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2080 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2082 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
2083 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2084 ; NVPTX-NEXT: entry:
2085 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2086 ; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]]
2087 ; NVPTX: region.check.tid:
2088 ; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
2089 ; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
2090 ; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
2091 ; NVPTX: region.guarded:
2092 ; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
2093 ; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]]
2094 ; NVPTX: region.guarded.end:
2095 ; NVPTX-NEXT: br label [[REGION_BARRIER]]
2096 ; NVPTX: region.barrier:
2097 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
2098 ; NVPTX-NEXT: br label [[REGION_EXIT:%.*]]
2099 ; NVPTX: region.exit:
2100 ; NVPTX-NEXT: br label [[FOR_COND:%.*]]
2101 ; NVPTX: for.cond:
2102 ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2103 ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2104 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2105 ; NVPTX: for.cond.cleanup:
2106 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
2107 ; NVPTX-NEXT: ret void
2108 ; NVPTX: for.body:
2109 ; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2110 ; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2111 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2112 ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2113 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2115 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
2116 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2117 ; AMDGPU-DISABLED1-NEXT: entry:
2118 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2119 ; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
2120 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
2121 ; AMDGPU-DISABLED1: for.cond:
2122 ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2123 ; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2124 ; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2125 ; AMDGPU-DISABLED1: for.cond.cleanup:
2126 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
2127 ; AMDGPU-DISABLED1-NEXT: ret void
2128 ; AMDGPU-DISABLED1: for.body:
2129 ; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2130 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2131 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2132 ; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2133 ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2135 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
2136 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2137 ; AMDGPU-DISABLED2-NEXT: entry:
2138 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2139 ; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
2140 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
2141 ; AMDGPU-DISABLED2: for.cond:
2142 ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2143 ; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2144 ; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2145 ; AMDGPU-DISABLED2: for.cond.cleanup:
2146 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
2147 ; AMDGPU-DISABLED2-NEXT: ret void
2148 ; AMDGPU-DISABLED2: for.body:
2149 ; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2150 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2151 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2152 ; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2153 ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2155 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
2156 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2157 ; NVPTX-DISABLED1-NEXT: entry:
2158 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2159 ; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
2160 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
2161 ; NVPTX-DISABLED1: for.cond:
2162 ; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2163 ; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2164 ; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2165 ; NVPTX-DISABLED1: for.cond.cleanup:
2166 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
2167 ; NVPTX-DISABLED1-NEXT: ret void
2168 ; NVPTX-DISABLED1: for.body:
2169 ; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2170 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2171 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2172 ; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2173 ; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2175 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
2176 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
2177 ; NVPTX-DISABLED2-NEXT: entry:
2178 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
2179 ; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
2180 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
2181 ; NVPTX-DISABLED2: for.cond:
2182 ; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
2183 ; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
2184 ; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
2185 ; NVPTX-DISABLED2: for.cond.cleanup:
2186 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
2187 ; NVPTX-DISABLED2-NEXT: ret void
2188 ; NVPTX-DISABLED2: for.body:
2189 ; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
2190 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
2191 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
2192 ; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
2193 ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
2213 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 1)
2220 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
2221 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2222 ; AMDGPU-NEXT: entry:
2223 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2224 ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2225 ; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2226 ; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2227 ; AMDGPU-NEXT: ret void
2229 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
2230 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2231 ; NVPTX-NEXT: entry:
2232 ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2233 ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2234 ; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2235 ; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2236 ; NVPTX-NEXT: ret void
2238 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
2239 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2240 ; AMDGPU-DISABLED1-NEXT: entry:
2241 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2242 ; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2243 ; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2244 ; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2245 ; AMDGPU-DISABLED1-NEXT: ret void
2247 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
2248 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2249 ; AMDGPU-DISABLED2-NEXT: entry:
2250 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2251 ; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2252 ; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2253 ; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2254 ; AMDGPU-DISABLED2-NEXT: ret void
2256 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
2257 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2258 ; NVPTX-DISABLED1-NEXT: entry:
2259 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2260 ; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2261 ; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2262 ; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2263 ; NVPTX-DISABLED1-NEXT: ret void
2265 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
2266 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
2267 ; NVPTX-DISABLED2-NEXT: entry:
2268 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]]
2269 ; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
2270 ; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]]
2271 ; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
2272 ; NVPTX-DISABLED2-NEXT: ret void
2283 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2284 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2285 ; AMDGPU-NEXT: entry:
2286 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2287 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2288 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2289 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2290 ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2291 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2292 ; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2293 ; AMDGPU-NEXT: ret void
2295 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2296 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2297 ; NVPTX-NEXT: entry:
2298 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2299 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2300 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2301 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2302 ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2303 ; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2304 ; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2305 ; NVPTX-NEXT: ret void
2307 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2308 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2309 ; AMDGPU-DISABLED1-NEXT: entry:
2310 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2311 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2312 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2313 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2314 ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2315 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2316 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2317 ; AMDGPU-DISABLED1-NEXT: ret void
2319 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2320 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2321 ; AMDGPU-DISABLED2-NEXT: entry:
2322 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2323 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2324 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2325 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2326 ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2327 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2328 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2329 ; AMDGPU-DISABLED2-NEXT: ret void
2331 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2332 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2333 ; NVPTX-DISABLED1-NEXT: entry:
2334 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2335 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2336 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2337 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2338 ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2339 ; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2340 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2341 ; NVPTX-DISABLED1-NEXT: ret void
2343 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
2344 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
2345 ; NVPTX-DISABLED2-NEXT: entry:
2346 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
2347 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2348 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
2349 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
2350 ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
2351 ; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
2352 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
2353 ; NVPTX-DISABLED2-NEXT: ret void
2369 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2370 ; AMDGPU-SAME: () #[[ATTR0]] {
2371 ; AMDGPU-NEXT: entry:
2372 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2373 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2374 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2375 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2376 ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2377 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2379 ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2380 ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2381 ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2382 ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2383 ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2385 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2386 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2387 ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2388 ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2389 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2390 ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2392 ; AMDGPU-NEXT: ret void
2394 ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2396 ; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2397 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2399 ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
2400 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2402 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2403 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2405 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2406 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2408 ; AMDGPU-NEXT: ret void
2410 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2411 ; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2412 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
2413 ; AMDGPU-NEXT: br label [[COMMON_RET]]
2415 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2416 ; NVPTX-SAME: () #[[ATTR0]] {
2417 ; NVPTX-NEXT: entry:
2418 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2419 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2420 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2421 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2422 ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2423 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2424 ; NVPTX: is_worker_check:
2425 ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2426 ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2427 ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2428 ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2429 ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2430 ; NVPTX: worker_state_machine.begin:
2431 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2432 ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2433 ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2434 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2435 ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2436 ; NVPTX: worker_state_machine.finished:
2437 ; NVPTX-NEXT: ret void
2438 ; NVPTX: worker_state_machine.is_active.check:
2439 ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2440 ; NVPTX: worker_state_machine.parallel_region.fallback.execute:
2441 ; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2442 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2443 ; NVPTX: worker_state_machine.parallel_region.end:
2444 ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
2445 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2446 ; NVPTX: worker_state_machine.done.barrier:
2447 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2448 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2449 ; NVPTX: thread.user_code.check:
2450 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2451 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2452 ; NVPTX: common.ret:
2453 ; NVPTX-NEXT: ret void
2454 ; NVPTX: user_code.entry:
2455 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2456 ; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2457 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
2458 ; NVPTX-NEXT: br label [[COMMON_RET]]
2460 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2461 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
2462 ; AMDGPU-DISABLED1-NEXT: entry:
2463 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2464 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2465 ; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2466 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2467 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2468 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2469 ; AMDGPU-DISABLED1: is_worker_check:
2470 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2471 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2472 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2473 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2474 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2475 ; AMDGPU-DISABLED1: worker_state_machine.begin:
2476 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2477 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2478 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2479 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2480 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2481 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2482 ; AMDGPU-DISABLED1: worker_state_machine.finished:
2483 ; AMDGPU-DISABLED1-NEXT: ret void
2484 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
2485 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2486 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
2487 ; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2488 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2489 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
2490 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
2491 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2492 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
2493 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2494 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2495 ; AMDGPU-DISABLED1: thread.user_code.check:
2496 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2497 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2498 ; AMDGPU-DISABLED1: common.ret:
2499 ; AMDGPU-DISABLED1-NEXT: ret void
2500 ; AMDGPU-DISABLED1: user_code.entry:
2501 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2502 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2503 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
2504 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
2506 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2507 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
2508 ; AMDGPU-DISABLED2-NEXT: entry:
2509 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2510 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2511 ; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2512 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2513 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2514 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2515 ; AMDGPU-DISABLED2: is_worker_check:
2516 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2517 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2518 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2519 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2520 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2521 ; AMDGPU-DISABLED2: worker_state_machine.begin:
2522 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2523 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2524 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2525 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2526 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2527 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2528 ; AMDGPU-DISABLED2: worker_state_machine.finished:
2529 ; AMDGPU-DISABLED2-NEXT: ret void
2530 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
2531 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2532 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
2533 ; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2534 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2535 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
2536 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
2537 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2538 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
2539 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2540 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2541 ; AMDGPU-DISABLED2: thread.user_code.check:
2542 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2543 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2544 ; AMDGPU-DISABLED2: common.ret:
2545 ; AMDGPU-DISABLED2-NEXT: ret void
2546 ; AMDGPU-DISABLED2: user_code.entry:
2547 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2548 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2549 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
2550 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
2552 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2553 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
2554 ; NVPTX-DISABLED1-NEXT: entry:
2555 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2556 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2557 ; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2558 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2559 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2560 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2561 ; NVPTX-DISABLED1: is_worker_check:
2562 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2563 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2564 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2565 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2566 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2567 ; NVPTX-DISABLED1: worker_state_machine.begin:
2568 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2569 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2570 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2571 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2572 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2573 ; NVPTX-DISABLED1: worker_state_machine.finished:
2574 ; NVPTX-DISABLED1-NEXT: ret void
2575 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
2576 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2577 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
2578 ; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2579 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2580 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
2581 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
2582 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2583 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
2584 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2585 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2586 ; NVPTX-DISABLED1: thread.user_code.check:
2587 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2588 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2589 ; NVPTX-DISABLED1: common.ret:
2590 ; NVPTX-DISABLED1-NEXT: ret void
2591 ; NVPTX-DISABLED1: user_code.entry:
2592 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2593 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2594 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
2595 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
2597 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
2598 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
2599 ; NVPTX-DISABLED2-NEXT: entry:
2600 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2601 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
2602 ; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
2603 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
2604 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2605 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2606 ; NVPTX-DISABLED2: is_worker_check:
2607 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2608 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2609 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2610 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2611 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2612 ; NVPTX-DISABLED2: worker_state_machine.begin:
2613 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2614 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2615 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2616 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2617 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2618 ; NVPTX-DISABLED2: worker_state_machine.finished:
2619 ; NVPTX-DISABLED2-NEXT: ret void
2620 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
2621 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2622 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
2623 ; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2624 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2625 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
2626 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
2627 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2628 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
2629 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2630 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2631 ; NVPTX-DISABLED2: thread.user_code.check:
2632 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2633 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2634 ; NVPTX-DISABLED2: common.ret:
2635 ; NVPTX-DISABLED2-NEXT: ret void
2636 ; NVPTX-DISABLED2: user_code.entry:
2637 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2638 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
2639 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
2640 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
2645 %exec_user_code = icmp eq i32 %0, -1
2662 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
2663 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2664 ; AMDGPU-NEXT: entry:
2665 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
2666 ; AMDGPU-NEXT: ret void
2668 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
2669 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2670 ; NVPTX-NEXT: entry:
2671 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
2672 ; NVPTX-NEXT: ret void
2674 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
2675 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2676 ; AMDGPU-DISABLED1-NEXT: entry:
2677 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
2678 ; AMDGPU-DISABLED1-NEXT: ret void
2680 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
2681 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2682 ; AMDGPU-DISABLED2-NEXT: entry:
2683 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
2684 ; AMDGPU-DISABLED2-NEXT: ret void
2686 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
2687 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2688 ; NVPTX-DISABLED1-NEXT: entry:
2689 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
2690 ; NVPTX-DISABLED1-NEXT: ret void
2692 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
2693 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
2694 ; NVPTX-DISABLED2-NEXT: entry:
2695 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
2696 ; NVPTX-DISABLED2-NEXT: ret void
2704 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2705 ; AMDGPU-SAME: () #[[ATTR0]] {
2706 ; AMDGPU-NEXT: entry:
2707 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2708 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2709 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2710 ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2711 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2713 ; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2714 ; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2715 ; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2716 ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2717 ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2719 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2720 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2721 ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2722 ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2723 ; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2724 ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2726 ; AMDGPU-NEXT: ret void
2728 ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2730 ; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2731 ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2733 ; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2734 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2736 ; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2737 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2739 ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
2740 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2742 ; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2743 ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2745 ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2746 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2748 ; AMDGPU-NEXT: ret void
2750 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2751 ; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2752 ; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2753 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2754 ; AMDGPU-NEXT: call void @__kmpc_target_deinit()
2755 ; AMDGPU-NEXT: br label [[COMMON_RET]]
2757 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2758 ; NVPTX-SAME: () #[[ATTR0]] {
2759 ; NVPTX-NEXT: entry:
2760 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2761 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2762 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2763 ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2764 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2765 ; NVPTX: is_worker_check:
2766 ; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2767 ; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2768 ; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2769 ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2770 ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2771 ; NVPTX: worker_state_machine.begin:
2772 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2773 ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2774 ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2775 ; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2776 ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2777 ; NVPTX: worker_state_machine.finished:
2778 ; NVPTX-NEXT: ret void
2779 ; NVPTX: worker_state_machine.is_active.check:
2780 ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2781 ; NVPTX: worker_state_machine.parallel_region.check:
2782 ; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2783 ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2784 ; NVPTX: worker_state_machine.parallel_region.execute:
2785 ; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2786 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2787 ; NVPTX: worker_state_machine.parallel_region.fallback.execute:
2788 ; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2789 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2790 ; NVPTX: worker_state_machine.parallel_region.end:
2791 ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
2792 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2793 ; NVPTX: worker_state_machine.done.barrier:
2794 ; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2795 ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2796 ; NVPTX: thread.user_code.check:
2797 ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2798 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2799 ; NVPTX: common.ret:
2800 ; NVPTX-NEXT: ret void
2801 ; NVPTX: user_code.entry:
2802 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2803 ; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2804 ; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2805 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2806 ; NVPTX-NEXT: call void @__kmpc_target_deinit()
2807 ; NVPTX-NEXT: br label [[COMMON_RET]]
2809 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2810 ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
2811 ; AMDGPU-DISABLED1-NEXT: entry:
2812 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2813 ; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2814 ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2815 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2816 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2817 ; AMDGPU-DISABLED1: is_worker_check:
2818 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2819 ; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2820 ; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2821 ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2822 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2823 ; AMDGPU-DISABLED1: worker_state_machine.begin:
2824 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2825 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2826 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2827 ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2828 ; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2829 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2830 ; AMDGPU-DISABLED1: worker_state_machine.finished:
2831 ; AMDGPU-DISABLED1-NEXT: ret void
2832 ; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
2833 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2834 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
2835 ; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2836 ; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2837 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
2838 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2839 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2840 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
2841 ; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2842 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2843 ; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
2844 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
2845 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2846 ; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
2847 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2848 ; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2849 ; AMDGPU-DISABLED1: thread.user_code.check:
2850 ; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2851 ; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2852 ; AMDGPU-DISABLED1: common.ret:
2853 ; AMDGPU-DISABLED1-NEXT: ret void
2854 ; AMDGPU-DISABLED1: user_code.entry:
2855 ; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2856 ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2857 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2858 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2859 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
2860 ; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
2862 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2863 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
2864 ; AMDGPU-DISABLED2-NEXT: entry:
2865 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
2866 ; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2867 ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2868 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2869 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2870 ; AMDGPU-DISABLED2: is_worker_check:
2871 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2872 ; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2873 ; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2874 ; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2875 ; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2876 ; AMDGPU-DISABLED2: worker_state_machine.begin:
2877 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2878 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
2879 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
2880 ; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
2881 ; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2882 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2883 ; AMDGPU-DISABLED2: worker_state_machine.finished:
2884 ; AMDGPU-DISABLED2-NEXT: ret void
2885 ; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
2886 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2887 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
2888 ; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2889 ; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2890 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
2891 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2892 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2893 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
2894 ; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2895 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2896 ; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
2897 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
2898 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2899 ; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
2900 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2901 ; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2902 ; AMDGPU-DISABLED2: thread.user_code.check:
2903 ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2904 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2905 ; AMDGPU-DISABLED2: common.ret:
2906 ; AMDGPU-DISABLED2-NEXT: ret void
2907 ; AMDGPU-DISABLED2: user_code.entry:
2908 ; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2909 ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2910 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2911 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2912 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
2913 ; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
2915 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2916 ; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
2917 ; NVPTX-DISABLED1-NEXT: entry:
2918 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2919 ; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2920 ; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2921 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2922 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2923 ; NVPTX-DISABLED1: is_worker_check:
2924 ; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2925 ; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2926 ; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2927 ; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2928 ; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2929 ; NVPTX-DISABLED1: worker_state_machine.begin:
2930 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2931 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2932 ; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2933 ; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2934 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2935 ; NVPTX-DISABLED1: worker_state_machine.finished:
2936 ; NVPTX-DISABLED1-NEXT: ret void
2937 ; NVPTX-DISABLED1: worker_state_machine.is_active.check:
2938 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2939 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
2940 ; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2941 ; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2942 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
2943 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2944 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2945 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
2946 ; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2947 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
2948 ; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
2949 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
2950 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
2951 ; NVPTX-DISABLED1: worker_state_machine.done.barrier:
2952 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2953 ; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
2954 ; NVPTX-DISABLED1: thread.user_code.check:
2955 ; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2956 ; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
2957 ; NVPTX-DISABLED1: common.ret:
2958 ; NVPTX-DISABLED1-NEXT: ret void
2959 ; NVPTX-DISABLED1: user_code.entry:
2960 ; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
2961 ; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
2962 ; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
2963 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
2964 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
2965 ; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
2967 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
2968 ; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
2969 ; NVPTX-DISABLED2-NEXT: entry:
2970 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
2971 ; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
2972 ; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
2973 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
2974 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
2975 ; NVPTX-DISABLED2: is_worker_check:
2976 ; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2977 ; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
2978 ; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
2979 ; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
2980 ; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
2981 ; NVPTX-DISABLED2: worker_state_machine.begin:
2982 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
2983 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
2984 ; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
2985 ; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
2986 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
2987 ; NVPTX-DISABLED2: worker_state_machine.finished:
2988 ; NVPTX-DISABLED2-NEXT: ret void
2989 ; NVPTX-DISABLED2: worker_state_machine.is_active.check:
2990 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
2991 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
2992 ; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID
2993 ; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
2994 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
2995 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
2996 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
2997 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
2998 ; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
2999 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
3000 ; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
3001 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
3002 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
3003 ; NVPTX-DISABLED2: worker_state_machine.done.barrier:
3004 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
3005 ; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
3006 ; NVPTX-DISABLED2: thread.user_code.check:
3007 ; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
3008 ; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
3009 ; NVPTX-DISABLED2: common.ret:
3010 ; NVPTX-DISABLED2-NEXT: ret void
3011 ; NVPTX-DISABLED2: user_code.entry:
3012 ; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
3013 ; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
3014 ; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
3015 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
3016 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
3017 ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
3021 %exec_user_code = icmp eq i32 %0, -1
3031 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0)
3038 ; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined.
3039 ; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3040 ; AMDGPU-NEXT: entry:
3041 ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
3042 ; AMDGPU-NEXT: ret void
3044 ; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined.
3045 ; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3046 ; NVPTX-NEXT: entry:
3047 ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
3048 ; NVPTX-NEXT: ret void
3050 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
3051 ; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3052 ; AMDGPU-DISABLED1-NEXT: entry:
3053 ; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
3054 ; AMDGPU-DISABLED1-NEXT: ret void
3056 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
3057 ; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3058 ; AMDGPU-DISABLED2-NEXT: entry:
3059 ; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
3060 ; AMDGPU-DISABLED2-NEXT: ret void
3062 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
3063 ; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3064 ; NVPTX-DISABLED1-NEXT: entry:
3065 ; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
3066 ; NVPTX-DISABLED1-NEXT: ret void
3068 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
3069 ; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
3070 ; NVPTX-DISABLED2-NEXT: entry:
3071 ; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
3072 ; NVPTX-DISABLED2-NEXT: ret void
3111 ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
3112 ; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3113 ; AMDGPU-NEXT: ret i32 0
3115 ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
3116 ; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3117 ; NVPTX-NEXT: ret i32 0
3119 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
3120 ; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3121 ; AMDGPU-DISABLED1-NEXT: ret i32 0
3123 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
3124 ; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3125 ; AMDGPU-DISABLED2-NEXT: ret i32 0
3127 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
3128 ; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3129 ; NVPTX-DISABLED1-NEXT: ret i32 0
3131 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
3132 ; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
3133 ; NVPTX-DISABLED2-NEXT: ret i32 0
3156 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
3157 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3158 ; AMDGPU-NEXT: entry:
3159 ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]]
3160 ; AMDGPU-NEXT: ret void
3162 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
3163 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3164 ; NVPTX-NEXT: entry:
3165 ; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
3166 ; NVPTX-NEXT: ret void
3168 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
3169 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3170 ; AMDGPU-DISABLED1-NEXT: entry:
3171 ; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
3172 ; AMDGPU-DISABLED1-NEXT: ret void
3174 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
3175 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3176 ; AMDGPU-DISABLED2-NEXT: entry:
3177 ; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
3178 ; AMDGPU-DISABLED2-NEXT: ret void
3180 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
3181 ; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3182 ; NVPTX-DISABLED1-NEXT: entry:
3183 ; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
3184 ; NVPTX-DISABLED1-NEXT: ret void
3186 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
3187 ; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
3188 ; NVPTX-DISABLED2-NEXT: entry:
3189 ; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
3190 ; NVPTX-DISABLED2-NEXT: ret void
3199 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3200 ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3201 ; AMDGPU-NEXT: entry:
3202 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3203 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3204 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3205 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3206 ; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3207 ; AMDGPU-NEXT: ret void
3209 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3210 ; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3211 ; NVPTX-NEXT: entry:
3212 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3213 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3214 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3215 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3216 ; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3217 ; NVPTX-NEXT: ret void
3219 ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3220 ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3221 ; AMDGPU-DISABLED1-NEXT: entry:
3222 ; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3223 ; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3224 ; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3225 ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3226 ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3227 ; AMDGPU-DISABLED1-NEXT: ret void
3229 ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3230 ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3231 ; AMDGPU-DISABLED2-NEXT: entry:
3232 ; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3233 ; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3234 ; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3235 ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3236 ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3237 ; AMDGPU-DISABLED2-NEXT: ret void
3239 ; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3240 ; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3241 ; NVPTX-DISABLED1-NEXT: entry:
3242 ; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3243 ; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3244 ; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3245 ; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3246 ; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3247 ; NVPTX-DISABLED1-NEXT: ret void
3249 ; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
3250 ; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
3251 ; NVPTX-DISABLED2-NEXT: entry:
3252 ; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
3253 ; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
3254 ; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
3255 ; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
3256 ; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
3257 ; NVPTX-DISABLED2-NEXT: ret void
3296 !14 = !{i32 7, !"openmp-device", i32 50}
3298 !16 = !{i32 7, !"frame-pointer", i32 2}
3321 ; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3322 ; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3325 ; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3326 ; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3327 ; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3329 ; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3330 ; NVPTX: attributes #[[ATTR1]] = { norecurse }
3331 ; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3332 ; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3333 ; NVPTX: attributes #[[ATTR4]] = { nounwind }
3334 ; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3335 ; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3336 ; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3337 ; NVPTX: attributes #[[ATTR8]] = { convergent }
3338 ; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3339 ; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3340 ; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3342 ; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3343 ; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse }
3344 ; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3345 ; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3346 ; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { nounwind }
3347 ; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3348 ; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3349 ; AMDGPU-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3350 ; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent }
3351 ; AMDGPU-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3352 ; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3353 ; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3355 ; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3356 ; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse }
3357 ; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3358 ; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3359 ; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { nounwind }
3360 ; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3361 ; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3362 ; AMDGPU-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3363 ; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent }
3364 ; AMDGPU-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3365 ; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3366 ; AMDGPU-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3368 ; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3369 ; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse }
3370 ; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3371 ; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3372 ; NVPTX-DISABLED1: attributes #[[ATTR4]] = { nounwind }
3373 ; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3374 ; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3375 ; NVPTX-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3376 ; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent }
3377 ; NVPTX-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3378 ; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3379 ; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3381 ; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
3382 ; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse }
3383 ; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
3384 ; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
3385 ; NVPTX-DISABLED2: attributes #[[ATTR4]] = { nounwind }
3386 ; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
3387 ; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
3388 ; NVPTX-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
3389 ; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent }
3390 ; NVPTX-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
3391 ; NVPTX-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
3392 ; NVPTX-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
3394 ; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3395 ; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3396 ; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3397 ; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3398 ; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3399 ; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3400 ; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3401 ; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3402 ; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3403 ; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3404 ; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3405 ; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3406 ; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3407 ; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3408 ; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3410 ; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3414 ; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3419 ; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3420 ; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3421 ; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3422 ; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3423 ; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3424 ; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3425 ; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3426 ; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3427 ; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3428 ; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3429 ; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3430 ; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3431 ; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3432 ; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3433 ; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3434 ; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"}
3435 ; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3436 ; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"}
3437 ; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"}
3438 ; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3439 ; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3440 ; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3441 ; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3442 ; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3444 ; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3445 ; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3446 ; AMDGPU-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3447 ; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3448 ; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3449 ; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3450 ; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3451 ; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3452 ; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3453 ; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3454 ; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3455 ; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3456 ; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3457 ; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3458 ; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3459 ; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
3460 ; AMDGPU-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3461 ; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
3462 ; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
3463 ; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3464 ; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3465 ; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3466 ; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3467 ; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3469 ; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3470 ; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3471 ; AMDGPU-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3472 ; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3473 ; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3474 ; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3475 ; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3476 ; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3477 ; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3478 ; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3479 ; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3480 ; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3481 ; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3482 ; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3483 ; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3484 ; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
3485 ; AMDGPU-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3486 ; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
3487 ; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
3488 ; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3489 ; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3490 ; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3491 ; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3492 ; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3494 ; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3495 ; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3496 ; NVPTX-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3497 ; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3498 ; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3499 ; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3500 ; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3501 ; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3502 ; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3503 ; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3504 ; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3505 ; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3506 ; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3507 ; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3508 ; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3509 ; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"}
3510 ; NVPTX-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3511 ; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"}
3512 ; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"}
3513 ; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3514 ; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3515 ; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3516 ; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3517 ; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}
3519 ; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
3520 ; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
3521 ; NVPTX-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
3522 ; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
3523 ; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
3524 ; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
3525 ; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
3526 ; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50}
3527 ; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
3528 ; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
3529 ; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
3530 ; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
3531 ; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
3532 ; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0}
3533 ; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0}
3534 ; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"}
3535 ; NVPTX-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]}
3536 ; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"}
3537 ; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"}
3538 ; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]}
3539 ; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
3540 ; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0}
3541 ; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]}
3542 ; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]}