1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs 2; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU 3; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX 4; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU-DISABLED 5; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX-DISABLED 6 7;; void p0(void); 8;; void p1(void); 9;; int unknown(void); 10;; void unknown_pure(void) __attribute__((pure)); 11;; [[omp::assume("omp_no_openmp")]] void unknown_no_openmp(void); 12;; 13;; int G; 14;; void no_parallel_region_in_here(void) { 15;; #pragma omp single 16;; G = 0; 17;; } 18;; 19;; void no_state_machine_needed() { 20;; #pragma omp target teams 21;; { 22;; no_parallel_region_in_here(); 23;; unknown_no_openmp(); 24;; } 25;; } 26;; 27;; void simple_state_machine() { 28;; #pragma omp target teams 29;; { 30;; unknown_no_openmp(); 31;; #pragma omp parallel 32;; { p0(); } 33;; no_parallel_region_in_here(); 34;; #pragma omp parallel 35;; { p1(); } 36;; } 37;; } 38;; 39;; void simple_state_machine_interprocedural_after(void); 40;; void simple_state_machine_interprocedural_before(void) { 41;; #pragma omp parallel 42;; { p0(); } 43;; } 44;; void simple_state_machine_interprocedural() { 45;; #pragma omp target teams 46;; { 47;; unknown_no_openmp(); 48;; simple_state_machine_interprocedural_before(); 49;; no_parallel_region_in_here(); 50;; #pragma omp parallel 51;; { p1(); } 52;; simple_state_machine_interprocedural_after(); 53;; } 54;; } 55;; void simple_state_machine_interprocedural_after(void) { 56;; #pragma omp parallel 57;; { p0(); } 58;; } 59;; 60;; void simple_state_machine_with_fallback() { 61;; #pragma omp target teams 62;; { 63;; #pragma omp parallel 64;; { p0(); } 65;; unknown(); 66;; #pragma omp parallel 67;; { p1(); } 68;; } 69;; } 70;; 71;; void simple_state_machine_no_openmp_attr() { 72;; #pragma omp target teams 73;; { 74;; #pragma omp parallel 75;; { p0(); } 76;; unknown_no_openmp(); 77;; #pragma omp parallel 78;; { p1(); } 79;; } 80;; } 81;; 82;; void simple_state_machine_pure() { 83;; #pragma omp target teams 84;; { 85;; unknown_no_openmp(); 86;; #pragma omp parallel 87;; { p0(); } 88;; unknown_pure(); 89;; #pragma omp parallel 90;; { p1(); } 91;; } 92;; } 93;; 94;; int omp_get_thread_num(); 95;; void simple_state_machine_interprocedural_nested_recursive_after(int); 96;; void simple_state_machine_interprocedural_nested_recursive_after_after(void); 97;; void simple_state_machine_interprocedural_nested_recursive() { 98;; #pragma omp target teams 99;; { 100;; simple_state_machine_interprocedural_nested_recursive_after( 101;; omp_get_thread_num()); 102;; } 103;; } 104;; 105;; void simple_state_machine_interprocedural_nested_recursive_after(int a) { 106;; if (a == 0) 107;; return; 108;; simple_state_machine_interprocedural_nested_recursive_after(a - 1); 109;; simple_state_machine_interprocedural_nested_recursive_after_after(); 110;; } 111;; void simple_state_machine_interprocedural_nested_recursive_after_after(void) { 112;; #pragma omp parallel 113;; { p0(); } 114;; } 115;; 116;; __attribute__((weak)) void weak_callee_empty(void) {} 117;; void no_state_machine_weak_callee() { 118;; #pragma omp target teams 119;; { weak_callee_empty(); } 120;; } 121 122%struct.ident_t = type { i32, i32, i32, i32, ptr } 123%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 124%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 125 126@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 127@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 128@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 129@G = external global i32, align 4 130@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8 131 132@__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 133@__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 134@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 135@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 136@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 137@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 138@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 139@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 140 141define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 { 142entry: 143 %.zero.addr = alloca i32, align 4 144 %.threadid_temp. = alloca i32, align 4 145 store i32 0, ptr %.zero.addr, align 4 146 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr %dyn) 147 %exec_user_code = icmp eq i32 %0, -1 148 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 149 150user_code.entry: ; preds = %entry 151 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 152 store i32 %1, ptr %.threadid_temp., align 4 153 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3 154 call void @__kmpc_target_deinit() 155 ret void 156 157worker.exit: ; preds = %entry 158 ret void 159} 160 161; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. 162define weak i32 @__kmpc_target_init(ptr, ptr) { 163 ret i32 0 164} 165 166define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 167entry: 168 %.global_tid..addr = alloca ptr, align 8 169 %.bound_tid..addr = alloca ptr, align 8 170 store ptr %.global_tid., ptr %.global_tid..addr, align 8 171 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 172 call void @no_parallel_region_in_here() #7 173 call void @unknown_no_openmp() #8 174 ret void 175} 176 177define hidden void @no_parallel_region_in_here() #1 { 178entry: 179 %0 = call i32 @__kmpc_global_thread_num(ptr @2) 180 %1 = call i32 @__kmpc_single(ptr @2, i32 %0) 181 %2 = icmp ne i32 %1, 0 182 br i1 %2, label %omp_if.then, label %omp_if.end 183 184omp_if.then: ; preds = %entry 185 store i32 0, ptr @G, align 4 186 call void @__kmpc_end_single(ptr @2, i32 %0) 187 br label %omp_if.end 188 189omp_if.end: ; preds = %omp_if.then, %entry 190 call void @__kmpc_barrier(ptr @3, i32 %0) 191 ret void 192} 193 194declare void @unknown_no_openmp() #2 195 196declare i32 @__kmpc_global_thread_num(ptr) #3 197 198declare void @__kmpc_target_deinit() 199 200define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 { 201entry: 202 %.zero.addr = alloca i32, align 4 203 %.threadid_temp. = alloca i32, align 4 204 store i32 0, ptr %.zero.addr, align 4 205 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr %dyn) 206 %exec_user_code = icmp eq i32 %0, -1 207 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 208 209user_code.entry: ; preds = %entry 210 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 211 store i32 %1, ptr %.threadid_temp., align 4 212 call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3 213 call void @__kmpc_target_deinit() 214 ret void 215 216worker.exit: ; preds = %entry 217 ret void 218} 219 220define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 221entry: 222 %.global_tid..addr = alloca ptr, align 8 223 %.bound_tid..addr = alloca ptr, align 8 224 %captured_vars_addrs = alloca [0 x ptr], align 8 225 %captured_vars_addrs1 = alloca [0 x ptr], align 8 226 store ptr %.global_tid., ptr %.global_tid..addr, align 8 227 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 228 call void @unknown_no_openmp() #8 229 %0 = load ptr, ptr %.global_tid..addr, align 8 230 %1 = load i32, ptr %0, align 4 231 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0) 232 call void @no_parallel_region_in_here() #7 233 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0) 234 ret void 235} 236 237define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 238entry: 239 %.global_tid..addr = alloca ptr, align 8 240 %.bound_tid..addr = alloca ptr, align 8 241 store ptr %.global_tid., ptr %.global_tid..addr, align 8 242 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 243 call void @p0() #7 244 ret void 245} 246 247declare void @p0() #4 248 249define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 { 250entry: 251 %.addr = alloca i16, align 2 252 %.addr1 = alloca i32, align 4 253 %.zero.addr = alloca i32, align 4 254 %global_args = alloca ptr, align 8 255 store i32 0, ptr %.zero.addr, align 4 256 store i16 %0, ptr %.addr, align 2 257 store i32 %1, ptr %.addr1, align 4 258 call void @__kmpc_get_shared_variables(ptr %global_args) 259 call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3 260 ret void 261} 262 263declare void @__kmpc_get_shared_variables(ptr) 264 265declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) 266 267define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 268entry: 269 %.global_tid..addr = alloca ptr, align 8 270 %.bound_tid..addr = alloca ptr, align 8 271 store ptr %.global_tid., ptr %.global_tid..addr, align 8 272 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 273 call void @p1() #7 274 ret void 275} 276 277declare void @p1() #4 278 279define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 { 280entry: 281 %.addr = alloca i16, align 2 282 %.addr1 = alloca i32, align 4 283 %.zero.addr = alloca i32, align 4 284 %global_args = alloca ptr, align 8 285 store i32 0, ptr %.zero.addr, align 4 286 store i16 %0, ptr %.addr, align 2 287 store i32 %1, ptr %.addr1, align 4 288 call void @__kmpc_get_shared_variables(ptr %global_args) 289 call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3 290 ret void 291} 292 293define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 { 294entry: 295 %.zero.addr = alloca i32, align 4 296 %.threadid_temp. = alloca i32, align 4 297 store i32 0, ptr %.zero.addr, align 4 298 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr %dyn) 299 %exec_user_code = icmp eq i32 %0, -1 300 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 301 302user_code.entry: ; preds = %entry 303 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 304 store i32 %1, ptr %.threadid_temp., align 4 305 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3 306 call void @__kmpc_target_deinit() 307 ret void 308 309worker.exit: ; preds = %entry 310 ret void 311} 312 313define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 314entry: 315 %.global_tid..addr = alloca ptr, align 8 316 %.bound_tid..addr = alloca ptr, align 8 317 %captured_vars_addrs = alloca [0 x ptr], align 8 318 store ptr %.global_tid., ptr %.global_tid..addr, align 8 319 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 320 call void @unknown_no_openmp() #8 321 call void @simple_state_machine_interprocedural_before() #7 322 call void @no_parallel_region_in_here() #7 323 %0 = load ptr, ptr %.global_tid..addr, align 8 324 %1 = load i32, ptr %0, align 4 325 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0) 326 call void @simple_state_machine_interprocedural_after() #7 327 ret void 328} 329 330define hidden void @simple_state_machine_interprocedural_before() #1 { 331entry: 332 %captured_vars_addrs = alloca [0 x ptr], align 8 333 %0 = call i32 @__kmpc_global_thread_num(ptr @2) 334 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0) 335 ret void 336} 337 338define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 339entry: 340 %.global_tid..addr = alloca ptr, align 8 341 %.bound_tid..addr = alloca ptr, align 8 342 store ptr %.global_tid., ptr %.global_tid..addr, align 8 343 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 344 call void @p1() #7 345 ret void 346} 347 348define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 { 349entry: 350 %.addr = alloca i16, align 2 351 %.addr1 = alloca i32, align 4 352 %.zero.addr = alloca i32, align 4 353 %global_args = alloca ptr, align 8 354 store i32 0, ptr %.zero.addr, align 4 355 store i16 %0, ptr %.addr, align 2 356 store i32 %1, ptr %.addr1, align 4 357 call void @__kmpc_get_shared_variables(ptr %global_args) 358 call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3 359 ret void 360} 361 362define hidden void @simple_state_machine_interprocedural_after() #1 { 363entry: 364 %captured_vars_addrs = alloca [0 x ptr], align 8 365 %0 = call i32 @__kmpc_global_thread_num(ptr @2) 366 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0) 367 ret void 368} 369 370define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 { 371entry: 372 %.zero.addr = alloca i32, align 4 373 %.threadid_temp. = alloca i32, align 4 374 store i32 0, ptr %.zero.addr, align 4 375 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr %dyn) 376 %exec_user_code = icmp eq i32 %0, -1 377 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 378 379user_code.entry: ; preds = %entry 380 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 381 store i32 %1, ptr %.threadid_temp., align 4 382 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3 383 call void @__kmpc_target_deinit() 384 ret void 385 386worker.exit: ; preds = %entry 387 ret void 388} 389 390define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 391entry: 392 %.global_tid..addr = alloca ptr, align 8 393 %.bound_tid..addr = alloca ptr, align 8 394 %captured_vars_addrs = alloca [0 x ptr], align 8 395 %captured_vars_addrs1 = alloca [0 x ptr], align 8 396 store ptr %.global_tid., ptr %.global_tid..addr, align 8 397 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 398 %0 = load ptr, ptr %.global_tid..addr, align 8 399 %1 = load i32, ptr %0, align 4 400 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0) 401 %call = call i32 @unknown() #7 402 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0) 403 ret void 404} 405 406define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 407entry: 408 %.global_tid..addr = alloca ptr, align 8 409 %.bound_tid..addr = alloca ptr, align 8 410 store ptr %.global_tid., ptr %.global_tid..addr, align 8 411 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 412 call void @p0() #7 413 ret void 414} 415 416define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 { 417entry: 418 %.addr = alloca i16, align 2 419 %.addr1 = alloca i32, align 4 420 %.zero.addr = alloca i32, align 4 421 %global_args = alloca ptr, align 8 422 store i32 0, ptr %.zero.addr, align 4 423 store i16 %0, ptr %.addr, align 2 424 store i32 %1, ptr %.addr1, align 4 425 call void @__kmpc_get_shared_variables(ptr %global_args) 426 call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3 427 ret void 428} 429 430declare i32 @unknown() #4 431 432define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 433entry: 434 %.global_tid..addr = alloca ptr, align 8 435 %.bound_tid..addr = alloca ptr, align 8 436 store ptr %.global_tid., ptr %.global_tid..addr, align 8 437 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 438 call void @p1() #7 439 ret void 440} 441 442define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 { 443entry: 444 %.addr = alloca i16, align 2 445 %.addr1 = alloca i32, align 4 446 %.zero.addr = alloca i32, align 4 447 %global_args = alloca ptr, align 8 448 store i32 0, ptr %.zero.addr, align 4 449 store i16 %0, ptr %.addr, align 2 450 store i32 %1, ptr %.addr1, align 4 451 call void @__kmpc_get_shared_variables(ptr %global_args) 452 call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3 453 ret void 454} 455 456define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 { 457entry: 458 %.zero.addr = alloca i32, align 4 459 %.threadid_temp. = alloca i32, align 4 460 store i32 0, ptr %.zero.addr, align 4 461 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr %dyn) 462 %exec_user_code = icmp eq i32 %0, -1 463 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 464 465user_code.entry: ; preds = %entry 466 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 467 store i32 %1, ptr %.threadid_temp., align 4 468 call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3 469 call void @__kmpc_target_deinit() 470 ret void 471 472worker.exit: ; preds = %entry 473 ret void 474} 475 476define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 477entry: 478 %.global_tid..addr = alloca ptr, align 8 479 %.bound_tid..addr = alloca ptr, align 8 480 %captured_vars_addrs = alloca [0 x ptr], align 8 481 %captured_vars_addrs1 = alloca [0 x ptr], align 8 482 store ptr %.global_tid., ptr %.global_tid..addr, align 8 483 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 484 %0 = load ptr, ptr %.global_tid..addr, align 8 485 %1 = load i32, ptr %0, align 4 486 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0) 487 call void @unknown_no_openmp() #8 488 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0) 489 ret void 490} 491 492define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 493entry: 494 %.global_tid..addr = alloca ptr, align 8 495 %.bound_tid..addr = alloca ptr, align 8 496 store ptr %.global_tid., ptr %.global_tid..addr, align 8 497 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 498 call void @p0() #7 499 ret void 500} 501 502define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 { 503entry: 504 %.addr = alloca i16, align 2 505 %.addr1 = alloca i32, align 4 506 %.zero.addr = alloca i32, align 4 507 %global_args = alloca ptr, align 8 508 store i32 0, ptr %.zero.addr, align 4 509 store i16 %0, ptr %.addr, align 2 510 store i32 %1, ptr %.addr1, align 4 511 call void @__kmpc_get_shared_variables(ptr %global_args) 512 call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3 513 ret void 514} 515 516define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 517entry: 518 %.global_tid..addr = alloca ptr, align 8 519 %.bound_tid..addr = alloca ptr, align 8 520 store ptr %.global_tid., ptr %.global_tid..addr, align 8 521 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 522 call void @p1() #7 523 ret void 524} 525 526define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 { 527entry: 528 %.addr = alloca i16, align 2 529 %.addr1 = alloca i32, align 4 530 %.zero.addr = alloca i32, align 4 531 %global_args = alloca ptr, align 8 532 store i32 0, ptr %.zero.addr, align 4 533 store i16 %0, ptr %.addr, align 2 534 store i32 %1, ptr %.addr1, align 4 535 call void @__kmpc_get_shared_variables(ptr %global_args) 536 call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3 537 ret void 538} 539 540define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 { 541entry: 542 %.zero.addr = alloca i32, align 4 543 %.threadid_temp. = alloca i32, align 4 544 store i32 0, ptr %.zero.addr, align 4 545 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr %dyn) 546 %exec_user_code = icmp eq i32 %0, -1 547 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 548 549user_code.entry: ; preds = %entry 550 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 551 store i32 %1, ptr %.threadid_temp., align 4 552 call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3 553 call void @__kmpc_target_deinit() 554 ret void 555 556worker.exit: ; preds = %entry 557 ret void 558} 559 560define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 561entry: 562 %.global_tid..addr = alloca ptr, align 8 563 %.bound_tid..addr = alloca ptr, align 8 564 %captured_vars_addrs = alloca [0 x ptr], align 8 565 %captured_vars_addrs1 = alloca [0 x ptr], align 8 566 store ptr %.global_tid., ptr %.global_tid..addr, align 8 567 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 568 call void @unknown_no_openmp() #8 569 %0 = load ptr, ptr %.global_tid..addr, align 8 570 %1 = load i32, ptr %0, align 4 571 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0) 572 call void @unknown_pure() #9 573 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0) 574 ret void 575} 576 577define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 578entry: 579 %.global_tid..addr = alloca ptr, align 8 580 %.bound_tid..addr = alloca ptr, align 8 581 store ptr %.global_tid., ptr %.global_tid..addr, align 8 582 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 583 call void @p0() #7 584 ret void 585} 586 587define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 { 588entry: 589 %.addr = alloca i16, align 2 590 %.addr1 = alloca i32, align 4 591 %.zero.addr = alloca i32, align 4 592 %global_args = alloca ptr, align 8 593 store i32 0, ptr %.zero.addr, align 4 594 store i16 %0, ptr %.addr, align 2 595 store i32 %1, ptr %.addr1, align 4 596 call void @__kmpc_get_shared_variables(ptr %global_args) 597 call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3 598 ret void 599} 600 601declare void @unknown_pure() #5 602 603define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 604entry: 605 %.global_tid..addr = alloca ptr, align 8 606 %.bound_tid..addr = alloca ptr, align 8 607 store ptr %.global_tid., ptr %.global_tid..addr, align 8 608 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 609 call void @p1() #7 610 ret void 611} 612 613define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 { 614entry: 615 %.addr = alloca i16, align 2 616 %.addr1 = alloca i32, align 4 617 %.zero.addr = alloca i32, align 4 618 %global_args = alloca ptr, align 8 619 store i32 0, ptr %.zero.addr, align 4 620 store i16 %0, ptr %.addr, align 2 621 store i32 %1, ptr %.addr1, align 4 622 call void @__kmpc_get_shared_variables(ptr %global_args) 623 call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3 624 ret void 625} 626 627define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 { 628entry: 629 %.zero.addr = alloca i32, align 4 630 %.threadid_temp. = alloca i32, align 4 631 store i32 0, ptr %.zero.addr, align 4 632 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr %dyn) 633 %exec_user_code = icmp eq i32 %0, -1 634 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 635 636user_code.entry: ; preds = %entry 637 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 638 store i32 %1, ptr %.threadid_temp., align 4 639 call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3 640 call void @__kmpc_target_deinit() 641 ret void 642 643worker.exit: ; preds = %entry 644 ret void 645} 646 647define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 648entry: 649 %.global_tid..addr = alloca ptr, align 8 650 %.bound_tid..addr = alloca ptr, align 8 651 store ptr %.global_tid., ptr %.global_tid..addr, align 8 652 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 653 %call = call i32 @omp_get_thread_num() #7 654 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7 655 ret void 656} 657 658define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 { 659entry: 660 %a.addr = alloca i32, align 4 661 store i32 %a, ptr %a.addr, align 4 662 %0 = load i32, ptr %a.addr, align 4 663 %cmp = icmp eq i32 %0, 0 664 br i1 %cmp, label %if.then, label %if.end 665 666if.then: ; preds = %entry 667 br label %return 668 669if.end: ; preds = %entry 670 %1 = load i32, ptr %a.addr, align 4 671 %sub = sub nsw i32 %1, 1 672 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7 673 call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7 674 br label %return 675 676return: ; preds = %if.end, %if.then 677 ret void 678} 679 680declare i32 @omp_get_thread_num(...) #4 681 682define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 { 683entry: 684 %.zero.addr = alloca i32, align 4 685 %.threadid_temp. = alloca i32, align 4 686 store i32 0, ptr %.zero.addr, align 4 687 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr %dyn) 688 %exec_user_code = icmp eq i32 %0, -1 689 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 690 691user_code.entry: ; preds = %entry 692 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 693 store i32 %1, ptr %.threadid_temp., align 4 694 call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3 695 call void @__kmpc_target_deinit() 696 ret void 697 698worker.exit: ; preds = %entry 699 ret void 700} 701 702define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 703entry: 704 %.global_tid..addr = alloca ptr, align 8 705 %.bound_tid..addr = alloca ptr, align 8 706 store ptr %.global_tid., ptr %.global_tid..addr, align 8 707 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 708 call void @weak_callee_empty() #7 709 ret void 710} 711 712define weak hidden void @weak_callee_empty() #1 { 713entry: 714 ret void 715} 716 717declare i32 @__kmpc_single(ptr, i32) #6 718 719declare void @__kmpc_end_single(ptr, i32) #6 720 721declare void @__kmpc_barrier(ptr, i32) #6 722 723define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 724entry: 725 %.global_tid..addr = alloca ptr, align 8 726 %.bound_tid..addr = alloca ptr, align 8 727 store ptr %.global_tid., ptr %.global_tid..addr, align 8 728 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 729 call void @p0() #7 730 ret void 731} 732 733define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 { 734entry: 735 %.addr = alloca i16, align 2 736 %.addr1 = alloca i32, align 4 737 %.zero.addr = alloca i32, align 4 738 %global_args = alloca ptr, align 8 739 store i32 0, ptr %.zero.addr, align 4 740 store i16 %0, ptr %.addr, align 2 741 store i32 %1, ptr %.addr1, align 4 742 call void @__kmpc_get_shared_variables(ptr %global_args) 743 call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3 744 ret void 745} 746 747define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 748entry: 749 %.global_tid..addr = alloca ptr, align 8 750 %.bound_tid..addr = alloca ptr, align 8 751 store ptr %.global_tid., ptr %.global_tid..addr, align 8 752 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 753 call void @p0() #7 754 ret void 755} 756 757define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 { 758entry: 759 %.addr = alloca i16, align 2 760 %.addr1 = alloca i32, align 4 761 %.zero.addr = alloca i32, align 4 762 %global_args = alloca ptr, align 8 763 store i32 0, ptr %.zero.addr, align 4 764 store i16 %0, ptr %.addr, align 2 765 store i32 %1, ptr %.addr1, align 4 766 call void @__kmpc_get_shared_variables(ptr %global_args) 767 call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3 768 ret void 769} 770 771define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 { 772entry: 773 %captured_vars_addrs = alloca [0 x ptr], align 8 774 %0 = call i32 @__kmpc_global_thread_num(ptr @2) 775 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0) 776 ret void 777} 778 779define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { 780entry: 781 %.global_tid..addr = alloca ptr, align 8 782 %.bound_tid..addr = alloca ptr, align 8 783 store ptr %.global_tid., ptr %.global_tid..addr, align 8 784 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 785 call void @p0() #7 786 ret void 787} 788 789define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 { 790entry: 791 %.addr = alloca i16, align 2 792 %.addr1 = alloca i32, align 4 793 %.zero.addr = alloca i32, align 4 794 %global_args = alloca ptr, align 8 795 store i32 0, ptr %.zero.addr, align 4 796 store i16 %0, ptr %.addr, align 2 797 store i32 %1, ptr %.addr1, align 4 798 call void @__kmpc_get_shared_variables(ptr %global_args) 799 call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3 800 ret void 801} 802 803attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 804attributes #1 = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 805attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 806attributes #3 = { nounwind } 807attributes #4 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 808attributes #5 = { convergent nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 809attributes #6 = { convergent nounwind } 810attributes #7 = { convergent } 811attributes #8 = { convergent "llvm.assume"="omp_no_openmp" } 812attributes #9 = { convergent nounwind readonly willreturn } 813 814!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7} 815!llvm.module.flags = !{!16, !17, !18} 816 817!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} 818!1 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} 819!2 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} 820!3 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} 821!4 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} 822!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} 823!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} 824!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} 825!16 = !{i32 1, !"wchar_size", i32 4} 826!17 = !{i32 7, !"openmp", i32 50} 827!18 = !{i32 7, !"openmp-device", i32 50} 828;. 829; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 830; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 831; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 832; AMDGPU: @G = external global i32, align 4 833; AMDGPU: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 834; AMDGPU: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 835; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 836; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 837; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 838; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 839; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 840; AMDGPU: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 841; AMDGPU: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 842; AMDGPU: @__omp_outlined__2_wrapper.ID = private constant i8 undef 843; AMDGPU: @__omp_outlined__3_wrapper.ID = private constant i8 undef 844; AMDGPU: @__omp_outlined__5_wrapper.ID = private constant i8 undef 845; AMDGPU: @__omp_outlined__7_wrapper.ID = private constant i8 undef 846; AMDGPU: @__omp_outlined__8_wrapper.ID = private constant i8 undef 847; AMDGPU: @__omp_outlined__10_wrapper.ID = private constant i8 undef 848; AMDGPU: @__omp_outlined__11_wrapper.ID = private constant i8 undef 849; AMDGPU: @__omp_outlined__13_wrapper.ID = private constant i8 undef 850; AMDGPU: @__omp_outlined__14_wrapper.ID = private constant i8 undef 851;. 852; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 853; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 854; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 855; NVPTX: @G = external global i32, align 4 856; NVPTX: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 857; NVPTX: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 858; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 859; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 860; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 861; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 862; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 863; NVPTX: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 864; NVPTX: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 865; NVPTX: @__omp_outlined__2_wrapper.ID = private constant i8 undef 866; NVPTX: @__omp_outlined__3_wrapper.ID = private constant i8 undef 867; NVPTX: @__omp_outlined__5_wrapper.ID = private constant i8 undef 868; NVPTX: @__omp_outlined__7_wrapper.ID = private constant i8 undef 869; NVPTX: @__omp_outlined__8_wrapper.ID = private constant i8 undef 870; NVPTX: @__omp_outlined__10_wrapper.ID = private constant i8 undef 871; NVPTX: @__omp_outlined__11_wrapper.ID = private constant i8 undef 872; NVPTX: @__omp_outlined__13_wrapper.ID = private constant i8 undef 873; NVPTX: @__omp_outlined__14_wrapper.ID = private constant i8 undef 874;. 875; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 876; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 877; AMDGPU-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 878; AMDGPU-DISABLED: @G = external global i32, align 4 879; AMDGPU-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 880; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 881; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 882; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 883; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 884; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 885; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 886; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 887; AMDGPU-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 888;. 889; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 890; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 891; NVPTX-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 892; NVPTX-DISABLED: @G = external global i32, align 4 893; NVPTX-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 894; NVPTX-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 895; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 896; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 897; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 898; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 899; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 900; NVPTX-DISABLED: @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 901; NVPTX-DISABLED: @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 902;. 903; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 904; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 905; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 906; AMDGPU-NEXT: entry: 907; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 908; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 909; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) 910; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 911; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 912; AMDGPU: user_code.entry: 913; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] 914; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 915; AMDGPU-NEXT: call void @__kmpc_target_deinit() 916; AMDGPU-NEXT: ret void 917; AMDGPU: worker.exit: 918; AMDGPU-NEXT: ret void 919; 920; 921; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init 922; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 923; AMDGPU-NEXT: ret i32 0 924; 925; 926; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 927; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ 928; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 929; AMDGPU-NEXT: entry: 930; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 931; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 932; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] 933; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] 934; AMDGPU-NEXT: ret void 935; 936; 937; AMDGPU: Function Attrs: convergent noinline nounwind 938; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized 939; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { 940; AMDGPU-NEXT: entry: 941; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 942; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 943; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 944; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 945; AMDGPU: omp_if.then: 946; AMDGPU-NEXT: store i32 0, ptr @G, align 4 947; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 948; AMDGPU-NEXT: br label [[OMP_IF_END]] 949; AMDGPU: omp_if.end: 950; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] 951; AMDGPU-NEXT: ret void 952; 953; 954; AMDGPU: Function Attrs: convergent noinline nounwind 955; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here 956; AMDGPU-SAME: () #[[ATTR1]] { 957; AMDGPU-NEXT: entry: 958; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 959; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) 960; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 961; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 962; AMDGPU: omp_if.then: 963; AMDGPU-NEXT: store i32 0, ptr @G, align 4 964; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) 965; AMDGPU-NEXT: br label [[OMP_IF_END]] 966; AMDGPU: omp_if.end: 967; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) 968; AMDGPU-NEXT: ret void 969; 970; 971; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 972; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 973; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 974; AMDGPU-NEXT: entry: 975; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 976; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 977; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 978; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) 979; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 980; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 981; AMDGPU: is_worker_check: 982; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 983; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 984; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 985; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 986; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 987; AMDGPU: worker_state_machine.begin: 988; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 989; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 990; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 991; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 992; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 993; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 994; AMDGPU: worker_state_machine.finished: 995; AMDGPU-NEXT: ret void 996; AMDGPU: worker_state_machine.is_active.check: 997; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 998; AMDGPU: worker_state_machine.parallel_region.check: 999; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__2_wrapper.ID 1000; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1001; AMDGPU: worker_state_machine.parallel_region.execute: 1002; AMDGPU-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) 1003; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1004; AMDGPU: worker_state_machine.parallel_region.check1: 1005; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 1006; AMDGPU: worker_state_machine.parallel_region.execute2: 1007; AMDGPU-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 1008; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1009; AMDGPU: worker_state_machine.parallel_region.check3: 1010; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1011; AMDGPU: worker_state_machine.parallel_region.end: 1012; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1013; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1014; AMDGPU: worker_state_machine.done.barrier: 1015; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1016; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1017; AMDGPU: thread.user_code.check: 1018; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1019; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1020; AMDGPU: user_code.entry: 1021; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1022; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1023; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1024; AMDGPU-NEXT: ret void 1025; AMDGPU: worker.exit: 1026; AMDGPU-NEXT: ret void 1027; 1028; 1029; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1030; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 1031; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1032; AMDGPU-NEXT: entry: 1033; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1034; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1035; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 1036; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 1037; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1038; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 1039; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 1040; AMDGPU-NEXT: ret void 1041; 1042; 1043; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1044; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 1045; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1046; AMDGPU-NEXT: entry: 1047; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1048; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1049; AMDGPU-NEXT: call void @p0() #[[ATTR11:[0-9]+]] 1050; AMDGPU-NEXT: ret void 1051; 1052; 1053; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1054; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper 1055; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1056; AMDGPU-NEXT: entry: 1057; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1058; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1059; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1060; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1061; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1062; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1063; AMDGPU-NEXT: ret void 1064; 1065; 1066; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1067; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 1068; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1069; AMDGPU-NEXT: entry: 1070; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1071; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1072; AMDGPU-NEXT: call void @p1() #[[ATTR11]] 1073; AMDGPU-NEXT: ret void 1074; 1075; 1076; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1077; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1078; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1079; AMDGPU-NEXT: entry: 1080; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1081; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1082; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1083; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1084; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1085; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1086; AMDGPU-NEXT: ret void 1087; 1088; 1089; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1090; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 1091; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1092; AMDGPU-NEXT: entry: 1093; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1094; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1095; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1096; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) 1097; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1098; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1099; AMDGPU: is_worker_check: 1100; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1101; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1102; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1103; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1104; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1105; AMDGPU: worker_state_machine.begin: 1106; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1107; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1108; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1109; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1110; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1111; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1112; AMDGPU: worker_state_machine.finished: 1113; AMDGPU-NEXT: ret void 1114; AMDGPU: worker_state_machine.is_active.check: 1115; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1116; AMDGPU: worker_state_machine.parallel_region.check: 1117; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__17_wrapper 1118; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1119; AMDGPU: worker_state_machine.parallel_region.execute: 1120; AMDGPU-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) 1121; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1122; AMDGPU: worker_state_machine.parallel_region.check1: 1123; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID 1124; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 1125; AMDGPU: worker_state_machine.parallel_region.execute2: 1126; AMDGPU-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 1127; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1128; AMDGPU: worker_state_machine.parallel_region.check3: 1129; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] 1130; AMDGPU: worker_state_machine.parallel_region.execute5: 1131; AMDGPU-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) 1132; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1133; AMDGPU: worker_state_machine.parallel_region.check6: 1134; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1135; AMDGPU: worker_state_machine.parallel_region.end: 1136; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1137; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1138; AMDGPU: worker_state_machine.done.barrier: 1139; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1140; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1141; AMDGPU: thread.user_code.check: 1142; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1143; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1144; AMDGPU: user_code.entry: 1145; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1146; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1147; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1148; AMDGPU-NEXT: ret void 1149; AMDGPU: worker.exit: 1150; AMDGPU-NEXT: ret void 1151; 1152; 1153; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1154; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 1155; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1156; AMDGPU-NEXT: entry: 1157; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1158; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1159; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 1160; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] 1161; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 1162; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1163; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] 1164; AMDGPU-NEXT: ret void 1165; 1166; 1167; AMDGPU: Function Attrs: noinline nounwind 1168; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized 1169; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] { 1170; AMDGPU-NEXT: entry: 1171; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1172; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 1173; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1174; AMDGPU-NEXT: ret void 1175; 1176; 1177; AMDGPU: Function Attrs: convergent noinline nounwind 1178; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before 1179; AMDGPU-SAME: () #[[ATTR1]] { 1180; AMDGPU-NEXT: entry: 1181; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1182; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 1183; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1184; AMDGPU-NEXT: ret void 1185; 1186; 1187; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1188; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 1189; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1190; AMDGPU-NEXT: entry: 1191; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1192; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1193; AMDGPU-NEXT: call void @p1() #[[ATTR11]] 1194; AMDGPU-NEXT: ret void 1195; 1196; 1197; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1198; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1199; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1200; AMDGPU-NEXT: entry: 1201; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1202; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1203; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1204; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1205; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1206; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1207; AMDGPU-NEXT: ret void 1208; 1209; 1210; AMDGPU: Function Attrs: noinline nounwind 1211; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized 1212; AMDGPU-SAME: () #[[ATTR6]] { 1213; AMDGPU-NEXT: entry: 1214; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1215; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 1216; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1217; AMDGPU-NEXT: ret void 1218; 1219; 1220; AMDGPU: Function Attrs: convergent noinline nounwind 1221; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after 1222; AMDGPU-SAME: () #[[ATTR1]] { 1223; AMDGPU-NEXT: entry: 1224; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1225; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 1226; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1227; AMDGPU-NEXT: ret void 1228; 1229; 1230; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1231; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 1232; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1233; AMDGPU-NEXT: entry: 1234; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1235; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1236; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1237; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) 1238; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1239; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1240; AMDGPU: is_worker_check: 1241; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1242; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1243; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1244; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1245; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1246; AMDGPU: worker_state_machine.begin: 1247; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1248; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1249; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1250; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1251; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1252; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1253; AMDGPU: worker_state_machine.finished: 1254; AMDGPU-NEXT: ret void 1255; AMDGPU: worker_state_machine.is_active.check: 1256; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1257; AMDGPU: worker_state_machine.parallel_region.check: 1258; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID 1259; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1260; AMDGPU: worker_state_machine.parallel_region.execute: 1261; AMDGPU-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1262; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1263; AMDGPU: worker_state_machine.parallel_region.check1: 1264; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__8_wrapper.ID 1265; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1266; AMDGPU: worker_state_machine.parallel_region.execute2: 1267; AMDGPU-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) 1268; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1269; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1270; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 1271; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1272; AMDGPU: worker_state_machine.parallel_region.end: 1273; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1274; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1275; AMDGPU: worker_state_machine.done.barrier: 1276; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1277; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1278; AMDGPU: thread.user_code.check: 1279; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1280; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1281; AMDGPU: user_code.entry: 1282; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1283; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1284; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1285; AMDGPU-NEXT: ret void 1286; AMDGPU: worker.exit: 1287; AMDGPU-NEXT: ret void 1288; 1289; 1290; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1291; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 1292; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1293; AMDGPU-NEXT: entry: 1294; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1295; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1296; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 1297; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1298; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] 1299; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 1300; AMDGPU-NEXT: ret void 1301; 1302; 1303; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1304; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 1305; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1306; AMDGPU-NEXT: entry: 1307; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1308; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1309; AMDGPU-NEXT: call void @p0() #[[ATTR11]] 1310; AMDGPU-NEXT: ret void 1311; 1312; 1313; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1314; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1315; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1316; AMDGPU-NEXT: entry: 1317; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1318; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1319; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1320; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1321; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1322; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1323; AMDGPU-NEXT: ret void 1324; 1325; 1326; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1327; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 1328; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1329; AMDGPU-NEXT: entry: 1330; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1331; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1332; AMDGPU-NEXT: call void @p1() #[[ATTR11]] 1333; AMDGPU-NEXT: ret void 1334; 1335; 1336; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1337; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper 1338; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1339; AMDGPU-NEXT: entry: 1340; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1341; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1342; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1343; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1344; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1345; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1346; AMDGPU-NEXT: ret void 1347; 1348; 1349; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1350; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 1351; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1352; AMDGPU-NEXT: entry: 1353; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1354; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1355; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1356; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) 1357; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1358; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1359; AMDGPU: is_worker_check: 1360; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1361; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1362; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1363; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1364; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1365; AMDGPU: worker_state_machine.begin: 1366; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1367; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1368; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1369; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1370; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1371; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1372; AMDGPU: worker_state_machine.finished: 1373; AMDGPU-NEXT: ret void 1374; AMDGPU: worker_state_machine.is_active.check: 1375; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1376; AMDGPU: worker_state_machine.parallel_region.check: 1377; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__10_wrapper.ID 1378; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1379; AMDGPU: worker_state_machine.parallel_region.execute: 1380; AMDGPU-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) 1381; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1382; AMDGPU: worker_state_machine.parallel_region.check1: 1383; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 1384; AMDGPU: worker_state_machine.parallel_region.execute2: 1385; AMDGPU-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) 1386; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1387; AMDGPU: worker_state_machine.parallel_region.check3: 1388; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1389; AMDGPU: worker_state_machine.parallel_region.end: 1390; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1391; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1392; AMDGPU: worker_state_machine.done.barrier: 1393; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1394; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1395; AMDGPU: thread.user_code.check: 1396; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1397; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1398; AMDGPU: user_code.entry: 1399; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1400; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1401; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1402; AMDGPU-NEXT: ret void 1403; AMDGPU: worker.exit: 1404; AMDGPU-NEXT: ret void 1405; 1406; 1407; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1408; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 1409; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1410; AMDGPU-NEXT: entry: 1411; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1412; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1413; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 1414; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1415; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 1416; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 1417; AMDGPU-NEXT: ret void 1418; 1419; 1420; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1421; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 1422; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1423; AMDGPU-NEXT: entry: 1424; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1425; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1426; AMDGPU-NEXT: call void @p0() #[[ATTR11]] 1427; AMDGPU-NEXT: ret void 1428; 1429; 1430; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1431; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper 1432; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1433; AMDGPU-NEXT: entry: 1434; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1435; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1436; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1437; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1438; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1439; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1440; AMDGPU-NEXT: ret void 1441; 1442; 1443; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1444; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 1445; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1446; AMDGPU-NEXT: entry: 1447; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1448; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1449; AMDGPU-NEXT: call void @p1() #[[ATTR11]] 1450; AMDGPU-NEXT: ret void 1451; 1452; 1453; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1454; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper 1455; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1456; AMDGPU-NEXT: entry: 1457; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1458; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1459; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1460; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1461; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1462; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1463; AMDGPU-NEXT: ret void 1464; 1465; 1466; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1467; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 1468; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1469; AMDGPU-NEXT: entry: 1470; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1471; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1472; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1473; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) 1474; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1475; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1476; AMDGPU: is_worker_check: 1477; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1478; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1479; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1480; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1481; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1482; AMDGPU: worker_state_machine.begin: 1483; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1484; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1485; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1486; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1487; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1488; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1489; AMDGPU: worker_state_machine.finished: 1490; AMDGPU-NEXT: ret void 1491; AMDGPU: worker_state_machine.is_active.check: 1492; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1493; AMDGPU: worker_state_machine.parallel_region.check: 1494; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__13_wrapper.ID 1495; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1496; AMDGPU: worker_state_machine.parallel_region.execute: 1497; AMDGPU-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) 1498; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1499; AMDGPU: worker_state_machine.parallel_region.check1: 1500; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 1501; AMDGPU: worker_state_machine.parallel_region.execute2: 1502; AMDGPU-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) 1503; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1504; AMDGPU: worker_state_machine.parallel_region.check3: 1505; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1506; AMDGPU: worker_state_machine.parallel_region.end: 1507; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1508; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1509; AMDGPU: worker_state_machine.done.barrier: 1510; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1511; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1512; AMDGPU: thread.user_code.check: 1513; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1514; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1515; AMDGPU: user_code.entry: 1516; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1517; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1518; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1519; AMDGPU-NEXT: ret void 1520; AMDGPU: worker.exit: 1521; AMDGPU-NEXT: ret void 1522; 1523; 1524; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1525; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 1526; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1527; AMDGPU-NEXT: entry: 1528; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1529; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1530; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 1531; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 1532; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1533; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 1534; AMDGPU-NEXT: ret void 1535; 1536; 1537; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1538; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 1539; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1540; AMDGPU-NEXT: entry: 1541; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1542; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1543; AMDGPU-NEXT: call void @p0() #[[ATTR11]] 1544; AMDGPU-NEXT: ret void 1545; 1546; 1547; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1548; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper 1549; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1550; AMDGPU-NEXT: entry: 1551; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1552; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1553; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1554; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1555; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1556; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1557; AMDGPU-NEXT: ret void 1558; 1559; 1560; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1561; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 1562; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1563; AMDGPU-NEXT: entry: 1564; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1565; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1566; AMDGPU-NEXT: call void @p1() #[[ATTR11]] 1567; AMDGPU-NEXT: ret void 1568; 1569; 1570; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1571; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper 1572; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1573; AMDGPU-NEXT: entry: 1574; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1575; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1576; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1577; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1578; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1579; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1580; AMDGPU-NEXT: ret void 1581; 1582; 1583; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1584; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 1585; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1586; AMDGPU-NEXT: entry: 1587; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1588; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1589; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) 1590; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1591; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1592; AMDGPU: user_code.entry: 1593; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1594; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1595; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1596; AMDGPU-NEXT: ret void 1597; AMDGPU: worker.exit: 1598; AMDGPU-NEXT: ret void 1599; 1600; 1601; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1602; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 1603; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1604; AMDGPU-NEXT: entry: 1605; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1606; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1607; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] 1608; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] 1609; AMDGPU-NEXT: ret void 1610; 1611; 1612; AMDGPU: Function Attrs: noinline nounwind 1613; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized 1614; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { 1615; AMDGPU-NEXT: entry: 1616; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 1617; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 1618; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 1619; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 1620; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 1621; AMDGPU: if.then: 1622; AMDGPU-NEXT: br label [[RETURN:%.*]] 1623; AMDGPU: if.end: 1624; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 1625; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 1626; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] 1627; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] 1628; AMDGPU-NEXT: br label [[RETURN]] 1629; AMDGPU: return: 1630; AMDGPU-NEXT: ret void 1631; 1632; 1633; AMDGPU: Function Attrs: convergent noinline nounwind 1634; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after 1635; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { 1636; AMDGPU-NEXT: entry: 1637; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 1638; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 1639; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 1640; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 1641; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 1642; AMDGPU: if.then: 1643; AMDGPU-NEXT: br label [[RETURN:%.*]] 1644; AMDGPU: if.end: 1645; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 1646; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 1647; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] 1648; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] 1649; AMDGPU-NEXT: br label [[RETURN]] 1650; AMDGPU: return: 1651; AMDGPU-NEXT: ret void 1652; 1653; 1654; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1655; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 1656; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1657; AMDGPU-NEXT: entry: 1658; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1659; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1660; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1661; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) 1662; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1663; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1664; AMDGPU: is_worker_check: 1665; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1666; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1667; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1668; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1669; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1670; AMDGPU: worker_state_machine.begin: 1671; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1672; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1673; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1674; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1675; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1676; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1677; AMDGPU: worker_state_machine.finished: 1678; AMDGPU-NEXT: ret void 1679; AMDGPU: worker_state_machine.is_active.check: 1680; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1681; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1682; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 1683; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1684; AMDGPU: worker_state_machine.parallel_region.end: 1685; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1686; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1687; AMDGPU: worker_state_machine.done.barrier: 1688; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1689; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1690; AMDGPU: thread.user_code.check: 1691; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1692; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1693; AMDGPU: user_code.entry: 1694; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1695; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1696; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1697; AMDGPU-NEXT: ret void 1698; AMDGPU: worker.exit: 1699; AMDGPU-NEXT: ret void 1700; 1701; 1702; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1703; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 1704; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1705; AMDGPU-NEXT: entry: 1706; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1707; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1708; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR9]] 1709; AMDGPU-NEXT: ret void 1710; 1711; 1712; AMDGPU: Function Attrs: convergent noinline nounwind 1713; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty 1714; AMDGPU-SAME: () #[[ATTR1]] { 1715; AMDGPU-NEXT: entry: 1716; AMDGPU-NEXT: ret void 1717; 1718; 1719; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1720; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 1721; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1722; AMDGPU-NEXT: entry: 1723; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1724; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1725; AMDGPU-NEXT: call void @p0() #[[ATTR11]] 1726; AMDGPU-NEXT: ret void 1727; 1728; 1729; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1730; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper 1731; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1732; AMDGPU-NEXT: entry: 1733; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1734; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1735; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1736; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1737; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1738; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1739; AMDGPU-NEXT: ret void 1740; 1741; 1742; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1743; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 1744; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1745; AMDGPU-NEXT: entry: 1746; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1747; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1748; AMDGPU-NEXT: call void @p0() #[[ATTR11]] 1749; AMDGPU-NEXT: ret void 1750; 1751; 1752; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1753; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper 1754; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1755; AMDGPU-NEXT: entry: 1756; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1757; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1758; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1759; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1760; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1761; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1762; AMDGPU-NEXT: ret void 1763; 1764; 1765; AMDGPU: Function Attrs: noinline nounwind 1766; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized 1767; AMDGPU-SAME: () #[[ATTR6]] { 1768; AMDGPU-NEXT: entry: 1769; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1770; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 1771; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1772; AMDGPU-NEXT: ret void 1773; 1774; 1775; AMDGPU: Function Attrs: convergent noinline nounwind 1776; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after 1777; AMDGPU-SAME: () #[[ATTR1]] { 1778; AMDGPU-NEXT: entry: 1779; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1780; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 1781; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1782; AMDGPU-NEXT: ret void 1783; 1784; 1785; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1786; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 1787; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1788; AMDGPU-NEXT: entry: 1789; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1790; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1791; AMDGPU-NEXT: call void @p0() #[[ATTR11]] 1792; AMDGPU-NEXT: ret void 1793; 1794; 1795; AMDGPU: Function Attrs: convergent noinline norecurse nounwind 1796; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper 1797; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1798; AMDGPU-NEXT: entry: 1799; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1800; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1801; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1802; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1803; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1804; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1805; AMDGPU-NEXT: ret void 1806; 1807; 1808; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1809; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 1810; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 1811; NVPTX-NEXT: entry: 1812; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1813; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1814; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) 1815; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1816; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1817; NVPTX: user_code.entry: 1818; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] 1819; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1820; NVPTX-NEXT: call void @__kmpc_target_deinit() 1821; NVPTX-NEXT: ret void 1822; NVPTX: worker.exit: 1823; NVPTX-NEXT: ret void 1824; 1825; 1826; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init 1827; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 1828; NVPTX-NEXT: ret i32 0 1829; 1830; 1831; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1832; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ 1833; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1834; NVPTX-NEXT: entry: 1835; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1836; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1837; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] 1838; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] 1839; NVPTX-NEXT: ret void 1840; 1841; 1842; NVPTX: Function Attrs: convergent noinline nounwind 1843; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized 1844; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { 1845; NVPTX-NEXT: entry: 1846; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 1847; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 1848; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 1849; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 1850; NVPTX: omp_if.then: 1851; NVPTX-NEXT: store i32 0, ptr @G, align 4 1852; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 1853; NVPTX-NEXT: br label [[OMP_IF_END]] 1854; NVPTX: omp_if.end: 1855; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] 1856; NVPTX-NEXT: ret void 1857; 1858; 1859; NVPTX: Function Attrs: convergent noinline nounwind 1860; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here 1861; NVPTX-SAME: () #[[ATTR1]] { 1862; NVPTX-NEXT: entry: 1863; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 1864; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) 1865; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 1866; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 1867; NVPTX: omp_if.then: 1868; NVPTX-NEXT: store i32 0, ptr @G, align 4 1869; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) 1870; NVPTX-NEXT: br label [[OMP_IF_END]] 1871; NVPTX: omp_if.end: 1872; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) 1873; NVPTX-NEXT: ret void 1874; 1875; 1876; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1877; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 1878; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1879; NVPTX-NEXT: entry: 1880; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 1881; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1882; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1883; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) 1884; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1885; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1886; NVPTX: is_worker_check: 1887; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1888; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1889; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1890; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1891; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1892; NVPTX: worker_state_machine.begin: 1893; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1894; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 1895; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 1896; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1897; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1898; NVPTX: worker_state_machine.finished: 1899; NVPTX-NEXT: ret void 1900; NVPTX: worker_state_machine.is_active.check: 1901; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1902; NVPTX: worker_state_machine.parallel_region.check: 1903; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__2_wrapper.ID 1904; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1905; NVPTX: worker_state_machine.parallel_region.execute: 1906; NVPTX-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) 1907; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1908; NVPTX: worker_state_machine.parallel_region.check1: 1909; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 1910; NVPTX: worker_state_machine.parallel_region.execute2: 1911; NVPTX-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 1912; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1913; NVPTX: worker_state_machine.parallel_region.check3: 1914; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1915; NVPTX: worker_state_machine.parallel_region.end: 1916; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 1917; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1918; NVPTX: worker_state_machine.done.barrier: 1919; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1920; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1921; NVPTX: thread.user_code.check: 1922; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1923; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1924; NVPTX: user_code.entry: 1925; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 1926; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1927; NVPTX-NEXT: call void @__kmpc_target_deinit() 1928; NVPTX-NEXT: ret void 1929; NVPTX: worker.exit: 1930; NVPTX-NEXT: ret void 1931; 1932; 1933; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1934; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 1935; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1936; NVPTX-NEXT: entry: 1937; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1938; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1939; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 1940; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 1941; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1942; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 1943; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 1944; NVPTX-NEXT: ret void 1945; 1946; 1947; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1948; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 1949; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1950; NVPTX-NEXT: entry: 1951; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1952; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1953; NVPTX-NEXT: call void @p0() #[[ATTR11:[0-9]+]] 1954; NVPTX-NEXT: ret void 1955; 1956; 1957; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1958; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper 1959; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1960; NVPTX-NEXT: entry: 1961; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1962; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1963; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1964; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1965; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1966; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1967; NVPTX-NEXT: ret void 1968; 1969; 1970; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1971; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 1972; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1973; NVPTX-NEXT: entry: 1974; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1975; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1976; NVPTX-NEXT: call void @p1() #[[ATTR11]] 1977; NVPTX-NEXT: ret void 1978; 1979; 1980; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1981; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1982; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 1983; NVPTX-NEXT: entry: 1984; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1985; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1986; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1987; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1988; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1989; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 1990; NVPTX-NEXT: ret void 1991; 1992; 1993; NVPTX: Function Attrs: convergent noinline norecurse nounwind 1994; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 1995; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 1996; NVPTX-NEXT: entry: 1997; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 1998; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1999; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2000; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) 2001; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2002; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2003; NVPTX: is_worker_check: 2004; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2005; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2006; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2007; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2008; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2009; NVPTX: worker_state_machine.begin: 2010; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2011; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2012; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2013; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2014; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2015; NVPTX: worker_state_machine.finished: 2016; NVPTX-NEXT: ret void 2017; NVPTX: worker_state_machine.is_active.check: 2018; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2019; NVPTX: worker_state_machine.parallel_region.check: 2020; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__17_wrapper 2021; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 2022; NVPTX: worker_state_machine.parallel_region.execute: 2023; NVPTX-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) 2024; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2025; NVPTX: worker_state_machine.parallel_region.check1: 2026; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID 2027; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 2028; NVPTX: worker_state_machine.parallel_region.execute2: 2029; NVPTX-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 2030; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2031; NVPTX: worker_state_machine.parallel_region.check3: 2032; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] 2033; NVPTX: worker_state_machine.parallel_region.execute5: 2034; NVPTX-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) 2035; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2036; NVPTX: worker_state_machine.parallel_region.check6: 2037; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2038; NVPTX: worker_state_machine.parallel_region.end: 2039; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2040; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2041; NVPTX: worker_state_machine.done.barrier: 2042; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2043; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2044; NVPTX: thread.user_code.check: 2045; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2046; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2047; NVPTX: user_code.entry: 2048; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2049; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2050; NVPTX-NEXT: call void @__kmpc_target_deinit() 2051; NVPTX-NEXT: ret void 2052; NVPTX: worker.exit: 2053; NVPTX-NEXT: ret void 2054; 2055; 2056; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2057; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 2058; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2059; NVPTX-NEXT: entry: 2060; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2061; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2062; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 2063; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] 2064; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 2065; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2066; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] 2067; NVPTX-NEXT: ret void 2068; 2069; 2070; NVPTX: Function Attrs: noinline nounwind 2071; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized 2072; NVPTX-SAME: () #[[ATTR6:[0-9]+]] { 2073; NVPTX-NEXT: entry: 2074; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2075; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 2076; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2077; NVPTX-NEXT: ret void 2078; 2079; 2080; NVPTX: Function Attrs: convergent noinline nounwind 2081; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before 2082; NVPTX-SAME: () #[[ATTR1]] { 2083; NVPTX-NEXT: entry: 2084; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2085; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 2086; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2087; NVPTX-NEXT: ret void 2088; 2089; 2090; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2091; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 2092; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2093; NVPTX-NEXT: entry: 2094; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2095; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2096; NVPTX-NEXT: call void @p1() #[[ATTR11]] 2097; NVPTX-NEXT: ret void 2098; 2099; 2100; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2101; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 2102; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2103; NVPTX-NEXT: entry: 2104; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2105; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2106; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2107; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2108; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2109; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2110; NVPTX-NEXT: ret void 2111; 2112; 2113; NVPTX: Function Attrs: noinline nounwind 2114; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized 2115; NVPTX-SAME: () #[[ATTR6]] { 2116; NVPTX-NEXT: entry: 2117; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2118; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 2119; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2120; NVPTX-NEXT: ret void 2121; 2122; 2123; NVPTX: Function Attrs: convergent noinline nounwind 2124; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after 2125; NVPTX-SAME: () #[[ATTR1]] { 2126; NVPTX-NEXT: entry: 2127; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2128; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 2129; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2130; NVPTX-NEXT: ret void 2131; 2132; 2133; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2134; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 2135; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2136; NVPTX-NEXT: entry: 2137; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2138; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2139; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2140; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) 2141; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2142; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2143; NVPTX: is_worker_check: 2144; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2145; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2146; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2147; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2148; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2149; NVPTX: worker_state_machine.begin: 2150; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2151; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2152; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2153; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2154; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2155; NVPTX: worker_state_machine.finished: 2156; NVPTX-NEXT: ret void 2157; NVPTX: worker_state_machine.is_active.check: 2158; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2159; NVPTX: worker_state_machine.parallel_region.check: 2160; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID 2161; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 2162; NVPTX: worker_state_machine.parallel_region.execute: 2163; NVPTX-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 2164; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2165; NVPTX: worker_state_machine.parallel_region.check1: 2166; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__8_wrapper.ID 2167; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2168; NVPTX: worker_state_machine.parallel_region.execute2: 2169; NVPTX-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) 2170; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2171; NVPTX: worker_state_machine.parallel_region.fallback.execute: 2172; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2173; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2174; NVPTX: worker_state_machine.parallel_region.end: 2175; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2176; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2177; NVPTX: worker_state_machine.done.barrier: 2178; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2179; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2180; NVPTX: thread.user_code.check: 2181; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2182; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2183; NVPTX: user_code.entry: 2184; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2185; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2186; NVPTX-NEXT: call void @__kmpc_target_deinit() 2187; NVPTX-NEXT: ret void 2188; NVPTX: worker.exit: 2189; NVPTX-NEXT: ret void 2190; 2191; 2192; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2193; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 2194; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2195; NVPTX-NEXT: entry: 2196; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2197; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2198; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 2199; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2200; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] 2201; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 2202; NVPTX-NEXT: ret void 2203; 2204; 2205; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2206; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 2207; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2208; NVPTX-NEXT: entry: 2209; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2210; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2211; NVPTX-NEXT: call void @p0() #[[ATTR11]] 2212; NVPTX-NEXT: ret void 2213; 2214; 2215; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2216; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2217; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2218; NVPTX-NEXT: entry: 2219; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2220; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2221; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2222; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2223; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2224; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2225; NVPTX-NEXT: ret void 2226; 2227; 2228; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2229; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 2230; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2231; NVPTX-NEXT: entry: 2232; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2233; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2234; NVPTX-NEXT: call void @p1() #[[ATTR11]] 2235; NVPTX-NEXT: ret void 2236; 2237; 2238; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2239; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper 2240; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2241; NVPTX-NEXT: entry: 2242; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2243; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2244; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2245; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2246; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2247; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2248; NVPTX-NEXT: ret void 2249; 2250; 2251; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2252; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 2253; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2254; NVPTX-NEXT: entry: 2255; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2256; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2257; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2258; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) 2259; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2260; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2261; NVPTX: is_worker_check: 2262; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2263; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2264; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2265; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2266; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2267; NVPTX: worker_state_machine.begin: 2268; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2269; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2270; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2271; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2272; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2273; NVPTX: worker_state_machine.finished: 2274; NVPTX-NEXT: ret void 2275; NVPTX: worker_state_machine.is_active.check: 2276; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2277; NVPTX: worker_state_machine.parallel_region.check: 2278; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__10_wrapper.ID 2279; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 2280; NVPTX: worker_state_machine.parallel_region.execute: 2281; NVPTX-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) 2282; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2283; NVPTX: worker_state_machine.parallel_region.check1: 2284; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 2285; NVPTX: worker_state_machine.parallel_region.execute2: 2286; NVPTX-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) 2287; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2288; NVPTX: worker_state_machine.parallel_region.check3: 2289; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2290; NVPTX: worker_state_machine.parallel_region.end: 2291; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2292; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2293; NVPTX: worker_state_machine.done.barrier: 2294; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2295; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2296; NVPTX: thread.user_code.check: 2297; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2298; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2299; NVPTX: user_code.entry: 2300; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2301; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2302; NVPTX-NEXT: call void @__kmpc_target_deinit() 2303; NVPTX-NEXT: ret void 2304; NVPTX: worker.exit: 2305; NVPTX-NEXT: ret void 2306; 2307; 2308; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2309; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 2310; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2311; NVPTX-NEXT: entry: 2312; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2313; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2314; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 2315; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2316; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 2317; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 2318; NVPTX-NEXT: ret void 2319; 2320; 2321; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2322; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10 2323; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2324; NVPTX-NEXT: entry: 2325; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2326; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2327; NVPTX-NEXT: call void @p0() #[[ATTR11]] 2328; NVPTX-NEXT: ret void 2329; 2330; 2331; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2332; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper 2333; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2334; NVPTX-NEXT: entry: 2335; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2336; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2337; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2338; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2339; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2340; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2341; NVPTX-NEXT: ret void 2342; 2343; 2344; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2345; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11 2346; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2347; NVPTX-NEXT: entry: 2348; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2349; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2350; NVPTX-NEXT: call void @p1() #[[ATTR11]] 2351; NVPTX-NEXT: ret void 2352; 2353; 2354; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2355; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper 2356; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2357; NVPTX-NEXT: entry: 2358; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2359; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2360; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2361; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2362; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2363; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2364; NVPTX-NEXT: ret void 2365; 2366; 2367; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2368; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 2369; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2370; NVPTX-NEXT: entry: 2371; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2372; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2373; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2374; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) 2375; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2376; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2377; NVPTX: is_worker_check: 2378; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2379; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2380; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2381; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2382; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2383; NVPTX: worker_state_machine.begin: 2384; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2385; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2386; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2387; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2388; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2389; NVPTX: worker_state_machine.finished: 2390; NVPTX-NEXT: ret void 2391; NVPTX: worker_state_machine.is_active.check: 2392; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2393; NVPTX: worker_state_machine.parallel_region.check: 2394; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__13_wrapper.ID 2395; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 2396; NVPTX: worker_state_machine.parallel_region.execute: 2397; NVPTX-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) 2398; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2399; NVPTX: worker_state_machine.parallel_region.check1: 2400; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] 2401; NVPTX: worker_state_machine.parallel_region.execute2: 2402; NVPTX-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) 2403; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2404; NVPTX: worker_state_machine.parallel_region.check3: 2405; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2406; NVPTX: worker_state_machine.parallel_region.end: 2407; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2408; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2409; NVPTX: worker_state_machine.done.barrier: 2410; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2411; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2412; NVPTX: thread.user_code.check: 2413; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2414; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2415; NVPTX: user_code.entry: 2416; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2417; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2418; NVPTX-NEXT: call void @__kmpc_target_deinit() 2419; NVPTX-NEXT: ret void 2420; NVPTX: worker.exit: 2421; NVPTX-NEXT: ret void 2422; 2423; 2424; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2425; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12 2426; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2427; NVPTX-NEXT: entry: 2428; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2429; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2430; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 2431; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 2432; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2433; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 2434; NVPTX-NEXT: ret void 2435; 2436; 2437; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2438; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13 2439; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2440; NVPTX-NEXT: entry: 2441; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2442; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2443; NVPTX-NEXT: call void @p0() #[[ATTR11]] 2444; NVPTX-NEXT: ret void 2445; 2446; 2447; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2448; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper 2449; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2450; NVPTX-NEXT: entry: 2451; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2452; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2453; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2454; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2455; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2456; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2457; NVPTX-NEXT: ret void 2458; 2459; 2460; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2461; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14 2462; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2463; NVPTX-NEXT: entry: 2464; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2465; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2466; NVPTX-NEXT: call void @p1() #[[ATTR11]] 2467; NVPTX-NEXT: ret void 2468; 2469; 2470; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2471; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper 2472; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2473; NVPTX-NEXT: entry: 2474; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2475; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2476; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2477; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2478; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2479; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2480; NVPTX-NEXT: ret void 2481; 2482; 2483; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2484; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 2485; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2486; NVPTX-NEXT: entry: 2487; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2488; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2489; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) 2490; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2491; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2492; NVPTX: user_code.entry: 2493; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2494; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2495; NVPTX-NEXT: call void @__kmpc_target_deinit() 2496; NVPTX-NEXT: ret void 2497; NVPTX: worker.exit: 2498; NVPTX-NEXT: ret void 2499; 2500; 2501; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2502; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15 2503; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2504; NVPTX-NEXT: entry: 2505; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2506; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2507; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] 2508; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] 2509; NVPTX-NEXT: ret void 2510; 2511; 2512; NVPTX: Function Attrs: noinline nounwind 2513; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized 2514; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { 2515; NVPTX-NEXT: entry: 2516; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 2517; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 2518; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 2519; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 2520; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 2521; NVPTX: if.then: 2522; NVPTX-NEXT: br label [[RETURN:%.*]] 2523; NVPTX: if.end: 2524; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 2525; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 2526; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] 2527; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] 2528; NVPTX-NEXT: br label [[RETURN]] 2529; NVPTX: return: 2530; NVPTX-NEXT: ret void 2531; 2532; 2533; NVPTX: Function Attrs: convergent noinline nounwind 2534; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after 2535; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { 2536; NVPTX-NEXT: entry: 2537; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 2538; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 2539; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 2540; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 2541; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 2542; NVPTX: if.then: 2543; NVPTX-NEXT: br label [[RETURN:%.*]] 2544; NVPTX: if.end: 2545; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 2546; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 2547; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] 2548; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] 2549; NVPTX-NEXT: br label [[RETURN]] 2550; NVPTX: return: 2551; NVPTX-NEXT: ret void 2552; 2553; 2554; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2555; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 2556; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2557; NVPTX-NEXT: entry: 2558; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2559; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2560; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2561; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) 2562; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2563; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2564; NVPTX: is_worker_check: 2565; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2566; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2567; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2568; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2569; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2570; NVPTX: worker_state_machine.begin: 2571; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2572; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2573; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2574; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2575; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2576; NVPTX: worker_state_machine.finished: 2577; NVPTX-NEXT: ret void 2578; NVPTX: worker_state_machine.is_active.check: 2579; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2580; NVPTX: worker_state_machine.parallel_region.fallback.execute: 2581; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2582; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2583; NVPTX: worker_state_machine.parallel_region.end: 2584; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2585; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2586; NVPTX: worker_state_machine.done.barrier: 2587; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2588; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2589; NVPTX: thread.user_code.check: 2590; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2591; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2592; NVPTX: user_code.entry: 2593; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2594; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2595; NVPTX-NEXT: call void @__kmpc_target_deinit() 2596; NVPTX-NEXT: ret void 2597; NVPTX: worker.exit: 2598; NVPTX-NEXT: ret void 2599; 2600; 2601; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2602; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 2603; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2604; NVPTX-NEXT: entry: 2605; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2606; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2607; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR9]] 2608; NVPTX-NEXT: ret void 2609; 2610; 2611; NVPTX: Function Attrs: convergent noinline nounwind 2612; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty 2613; NVPTX-SAME: () #[[ATTR1]] { 2614; NVPTX-NEXT: entry: 2615; NVPTX-NEXT: ret void 2616; 2617; 2618; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2619; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17 2620; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2621; NVPTX-NEXT: entry: 2622; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2623; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2624; NVPTX-NEXT: call void @p0() #[[ATTR11]] 2625; NVPTX-NEXT: ret void 2626; 2627; 2628; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2629; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper 2630; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2631; NVPTX-NEXT: entry: 2632; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2633; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2634; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2635; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2636; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2637; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2638; NVPTX-NEXT: ret void 2639; 2640; 2641; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2642; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18 2643; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2644; NVPTX-NEXT: entry: 2645; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2646; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2647; NVPTX-NEXT: call void @p0() #[[ATTR11]] 2648; NVPTX-NEXT: ret void 2649; 2650; 2651; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2652; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper 2653; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2654; NVPTX-NEXT: entry: 2655; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2656; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2657; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2658; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2659; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2660; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2661; NVPTX-NEXT: ret void 2662; 2663; 2664; NVPTX: Function Attrs: noinline nounwind 2665; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized 2666; NVPTX-SAME: () #[[ATTR6]] { 2667; NVPTX-NEXT: entry: 2668; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2669; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 2670; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2671; NVPTX-NEXT: ret void 2672; 2673; 2674; NVPTX: Function Attrs: convergent noinline nounwind 2675; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after 2676; NVPTX-SAME: () #[[ATTR1]] { 2677; NVPTX-NEXT: entry: 2678; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2679; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 2680; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2681; NVPTX-NEXT: ret void 2682; 2683; 2684; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2685; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19 2686; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2687; NVPTX-NEXT: entry: 2688; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2689; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2690; NVPTX-NEXT: call void @p0() #[[ATTR11]] 2691; NVPTX-NEXT: ret void 2692; 2693; 2694; NVPTX: Function Attrs: convergent noinline norecurse nounwind 2695; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper 2696; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2697; NVPTX-NEXT: entry: 2698; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2699; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2700; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2701; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2702; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2703; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2704; NVPTX-NEXT: ret void 2705; 2706; 2707; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2708; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 2709; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 2710; AMDGPU-DISABLED-NEXT: entry: 2711; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2712; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2713; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) 2714; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2715; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2716; AMDGPU-DISABLED: user_code.entry: 2717; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] 2718; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2719; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 2720; AMDGPU-DISABLED-NEXT: ret void 2721; AMDGPU-DISABLED: worker.exit: 2722; AMDGPU-DISABLED-NEXT: ret void 2723; 2724; 2725; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init 2726; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 2727; AMDGPU-DISABLED-NEXT: ret i32 0 2728; 2729; 2730; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2731; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 2732; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2733; AMDGPU-DISABLED-NEXT: entry: 2734; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2735; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2736; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] 2737; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] 2738; AMDGPU-DISABLED-NEXT: ret void 2739; 2740; 2741; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind 2742; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized 2743; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { 2744; AMDGPU-DISABLED-NEXT: entry: 2745; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 2746; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 2747; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 2748; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 2749; AMDGPU-DISABLED: omp_if.then: 2750; AMDGPU-DISABLED-NEXT: store i32 0, ptr @G, align 4 2751; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 2752; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]] 2753; AMDGPU-DISABLED: omp_if.end: 2754; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] 2755; AMDGPU-DISABLED-NEXT: ret void 2756; 2757; 2758; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind 2759; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here 2760; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { 2761; AMDGPU-DISABLED-NEXT: entry: 2762; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 2763; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) 2764; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 2765; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 2766; AMDGPU-DISABLED: omp_if.then: 2767; AMDGPU-DISABLED-NEXT: store i32 0, ptr @G, align 4 2768; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) 2769; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]] 2770; AMDGPU-DISABLED: omp_if.end: 2771; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) 2772; AMDGPU-DISABLED-NEXT: ret void 2773; 2774; 2775; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2776; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 2777; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2778; AMDGPU-DISABLED-NEXT: entry: 2779; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2780; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2781; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) 2782; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2783; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2784; AMDGPU-DISABLED: user_code.entry: 2785; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2786; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2787; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 2788; AMDGPU-DISABLED-NEXT: ret void 2789; AMDGPU-DISABLED: worker.exit: 2790; AMDGPU-DISABLED-NEXT: ret void 2791; 2792; 2793; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2794; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 2795; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2796; AMDGPU-DISABLED-NEXT: entry: 2797; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2798; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2799; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 2800; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 2801; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2802; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 2803; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 2804; AMDGPU-DISABLED-NEXT: ret void 2805; 2806; 2807; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2808; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 2809; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2810; AMDGPU-DISABLED-NEXT: entry: 2811; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2812; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2813; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]] 2814; AMDGPU-DISABLED-NEXT: ret void 2815; 2816; 2817; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2818; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper 2819; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2820; AMDGPU-DISABLED-NEXT: entry: 2821; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2822; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2823; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2824; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2825; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2826; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2827; AMDGPU-DISABLED-NEXT: ret void 2828; 2829; 2830; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2831; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 2832; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2833; AMDGPU-DISABLED-NEXT: entry: 2834; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2835; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2836; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] 2837; AMDGPU-DISABLED-NEXT: ret void 2838; 2839; 2840; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2841; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 2842; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2843; AMDGPU-DISABLED-NEXT: entry: 2844; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2845; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2846; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2847; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2848; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2849; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2850; AMDGPU-DISABLED-NEXT: ret void 2851; 2852; 2853; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2854; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 2855; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2856; AMDGPU-DISABLED-NEXT: entry: 2857; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2858; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2859; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) 2860; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2861; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2862; AMDGPU-DISABLED: user_code.entry: 2863; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2864; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2865; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 2866; AMDGPU-DISABLED-NEXT: ret void 2867; AMDGPU-DISABLED: worker.exit: 2868; AMDGPU-DISABLED-NEXT: ret void 2869; 2870; 2871; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2872; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 2873; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2874; AMDGPU-DISABLED-NEXT: entry: 2875; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2876; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2877; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 2878; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] 2879; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 2880; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2881; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] 2882; AMDGPU-DISABLED-NEXT: ret void 2883; 2884; 2885; AMDGPU-DISABLED: Function Attrs: noinline nounwind 2886; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized 2887; AMDGPU-DISABLED-SAME: () #[[ATTR6:[0-9]+]] { 2888; AMDGPU-DISABLED-NEXT: entry: 2889; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2890; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 2891; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2892; AMDGPU-DISABLED-NEXT: ret void 2893; 2894; 2895; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind 2896; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before 2897; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { 2898; AMDGPU-DISABLED-NEXT: entry: 2899; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2900; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 2901; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2902; AMDGPU-DISABLED-NEXT: ret void 2903; 2904; 2905; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2906; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 2907; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2908; AMDGPU-DISABLED-NEXT: entry: 2909; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2910; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2911; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] 2912; AMDGPU-DISABLED-NEXT: ret void 2913; 2914; 2915; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2916; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 2917; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2918; AMDGPU-DISABLED-NEXT: entry: 2919; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2920; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2921; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2922; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2923; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2924; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2925; AMDGPU-DISABLED-NEXT: ret void 2926; 2927; 2928; AMDGPU-DISABLED: Function Attrs: noinline nounwind 2929; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized 2930; AMDGPU-DISABLED-SAME: () #[[ATTR6]] { 2931; AMDGPU-DISABLED-NEXT: entry: 2932; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2933; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 2934; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2935; AMDGPU-DISABLED-NEXT: ret void 2936; 2937; 2938; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind 2939; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after 2940; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { 2941; AMDGPU-DISABLED-NEXT: entry: 2942; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2943; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 2944; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2945; AMDGPU-DISABLED-NEXT: ret void 2946; 2947; 2948; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2949; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 2950; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 2951; AMDGPU-DISABLED-NEXT: entry: 2952; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2953; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2954; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) 2955; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2956; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2957; AMDGPU-DISABLED: user_code.entry: 2958; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 2959; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2960; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 2961; AMDGPU-DISABLED-NEXT: ret void 2962; AMDGPU-DISABLED: worker.exit: 2963; AMDGPU-DISABLED-NEXT: ret void 2964; 2965; 2966; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2967; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 2968; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2969; AMDGPU-DISABLED-NEXT: entry: 2970; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2971; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2972; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 2973; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2974; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] 2975; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 2976; AMDGPU-DISABLED-NEXT: ret void 2977; 2978; 2979; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2980; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 2981; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2982; AMDGPU-DISABLED-NEXT: entry: 2983; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2984; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2985; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] 2986; AMDGPU-DISABLED-NEXT: ret void 2987; 2988; 2989; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 2990; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2991; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 2992; AMDGPU-DISABLED-NEXT: entry: 2993; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2994; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2995; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2996; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2997; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2998; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 2999; AMDGPU-DISABLED-NEXT: ret void 3000; 3001; 3002; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3003; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 3004; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3005; AMDGPU-DISABLED-NEXT: entry: 3006; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3007; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3008; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3009; AMDGPU-DISABLED-NEXT: ret void 3010; 3011; 3012; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3013; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper 3014; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3015; AMDGPU-DISABLED-NEXT: entry: 3016; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3017; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3018; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3019; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3020; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3021; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3022; AMDGPU-DISABLED-NEXT: ret void 3023; 3024; 3025; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3026; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 3027; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3028; AMDGPU-DISABLED-NEXT: entry: 3029; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3030; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3031; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) 3032; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3033; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3034; AMDGPU-DISABLED: user_code.entry: 3035; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3036; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3037; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 3038; AMDGPU-DISABLED-NEXT: ret void 3039; AMDGPU-DISABLED: worker.exit: 3040; AMDGPU-DISABLED-NEXT: ret void 3041; 3042; 3043; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3044; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 3045; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3046; AMDGPU-DISABLED-NEXT: entry: 3047; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3048; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3049; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 3050; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3051; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 3052; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 3053; AMDGPU-DISABLED-NEXT: ret void 3054; 3055; 3056; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3057; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 3058; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3059; AMDGPU-DISABLED-NEXT: entry: 3060; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3061; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3062; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3063; AMDGPU-DISABLED-NEXT: ret void 3064; 3065; 3066; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3067; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper 3068; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3069; AMDGPU-DISABLED-NEXT: entry: 3070; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3071; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3072; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3073; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3074; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3075; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3076; AMDGPU-DISABLED-NEXT: ret void 3077; 3078; 3079; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3080; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 3081; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3082; AMDGPU-DISABLED-NEXT: entry: 3083; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3084; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3085; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3086; AMDGPU-DISABLED-NEXT: ret void 3087; 3088; 3089; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3090; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper 3091; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3092; AMDGPU-DISABLED-NEXT: entry: 3093; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3094; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3095; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3096; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3097; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3098; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3099; AMDGPU-DISABLED-NEXT: ret void 3100; 3101; 3102; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3103; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 3104; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3105; AMDGPU-DISABLED-NEXT: entry: 3106; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3107; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3108; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) 3109; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3110; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3111; AMDGPU-DISABLED: user_code.entry: 3112; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3113; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3114; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 3115; AMDGPU-DISABLED-NEXT: ret void 3116; AMDGPU-DISABLED: worker.exit: 3117; AMDGPU-DISABLED-NEXT: ret void 3118; 3119; 3120; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3121; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 3122; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3123; AMDGPU-DISABLED-NEXT: entry: 3124; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3125; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3126; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 3127; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 3128; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3129; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 3130; AMDGPU-DISABLED-NEXT: ret void 3131; 3132; 3133; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3134; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 3135; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3136; AMDGPU-DISABLED-NEXT: entry: 3137; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3138; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3139; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3140; AMDGPU-DISABLED-NEXT: ret void 3141; 3142; 3143; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3144; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper 3145; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3146; AMDGPU-DISABLED-NEXT: entry: 3147; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3148; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3149; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3150; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3151; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3152; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3153; AMDGPU-DISABLED-NEXT: ret void 3154; 3155; 3156; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3157; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 3158; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3159; AMDGPU-DISABLED-NEXT: entry: 3160; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3161; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3162; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3163; AMDGPU-DISABLED-NEXT: ret void 3164; 3165; 3166; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3167; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper 3168; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3169; AMDGPU-DISABLED-NEXT: entry: 3170; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3171; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3172; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3173; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3174; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3175; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3176; AMDGPU-DISABLED-NEXT: ret void 3177; 3178; 3179; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3180; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 3181; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3182; AMDGPU-DISABLED-NEXT: entry: 3183; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3184; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3185; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) 3186; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3187; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3188; AMDGPU-DISABLED: user_code.entry: 3189; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3190; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3191; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 3192; AMDGPU-DISABLED-NEXT: ret void 3193; AMDGPU-DISABLED: worker.exit: 3194; AMDGPU-DISABLED-NEXT: ret void 3195; 3196; 3197; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3198; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 3199; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3200; AMDGPU-DISABLED-NEXT: entry: 3201; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3202; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3203; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] 3204; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] 3205; AMDGPU-DISABLED-NEXT: ret void 3206; 3207; 3208; AMDGPU-DISABLED: Function Attrs: noinline nounwind 3209; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized 3210; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { 3211; AMDGPU-DISABLED-NEXT: entry: 3212; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 3213; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 3214; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 3215; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 3216; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 3217; AMDGPU-DISABLED: if.then: 3218; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]] 3219; AMDGPU-DISABLED: if.end: 3220; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 3221; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 3222; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] 3223; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] 3224; AMDGPU-DISABLED-NEXT: br label [[RETURN]] 3225; AMDGPU-DISABLED: return: 3226; AMDGPU-DISABLED-NEXT: ret void 3227; 3228; 3229; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind 3230; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after 3231; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { 3232; AMDGPU-DISABLED-NEXT: entry: 3233; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 3234; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 3235; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 3236; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 3237; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 3238; AMDGPU-DISABLED: if.then: 3239; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]] 3240; AMDGPU-DISABLED: if.end: 3241; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 3242; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 3243; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] 3244; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] 3245; AMDGPU-DISABLED-NEXT: br label [[RETURN]] 3246; AMDGPU-DISABLED: return: 3247; AMDGPU-DISABLED-NEXT: ret void 3248; 3249; 3250; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3251; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 3252; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3253; AMDGPU-DISABLED-NEXT: entry: 3254; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3255; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3256; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) 3257; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3258; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3259; AMDGPU-DISABLED: user_code.entry: 3260; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3261; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3262; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() 3263; AMDGPU-DISABLED-NEXT: ret void 3264; AMDGPU-DISABLED: worker.exit: 3265; AMDGPU-DISABLED-NEXT: ret void 3266; 3267; 3268; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3269; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 3270; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3271; AMDGPU-DISABLED-NEXT: entry: 3272; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3273; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3274; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]] 3275; AMDGPU-DISABLED-NEXT: ret void 3276; 3277; 3278; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind 3279; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty 3280; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { 3281; AMDGPU-DISABLED-NEXT: entry: 3282; AMDGPU-DISABLED-NEXT: ret void 3283; 3284; 3285; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3286; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 3287; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3288; AMDGPU-DISABLED-NEXT: entry: 3289; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3290; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3291; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3292; AMDGPU-DISABLED-NEXT: ret void 3293; 3294; 3295; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3296; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper 3297; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3298; AMDGPU-DISABLED-NEXT: entry: 3299; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3300; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3301; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3302; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3303; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3304; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3305; AMDGPU-DISABLED-NEXT: ret void 3306; 3307; 3308; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3309; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 3310; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3311; AMDGPU-DISABLED-NEXT: entry: 3312; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3313; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3314; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3315; AMDGPU-DISABLED-NEXT: ret void 3316; 3317; 3318; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3319; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper 3320; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3321; AMDGPU-DISABLED-NEXT: entry: 3322; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3323; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3324; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3325; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3326; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3327; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3328; AMDGPU-DISABLED-NEXT: ret void 3329; 3330; 3331; AMDGPU-DISABLED: Function Attrs: noinline nounwind 3332; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized 3333; AMDGPU-DISABLED-SAME: () #[[ATTR6]] { 3334; AMDGPU-DISABLED-NEXT: entry: 3335; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3336; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 3337; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3338; AMDGPU-DISABLED-NEXT: ret void 3339; 3340; 3341; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind 3342; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after 3343; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { 3344; AMDGPU-DISABLED-NEXT: entry: 3345; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3346; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 3347; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3348; AMDGPU-DISABLED-NEXT: ret void 3349; 3350; 3351; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3352; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 3353; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3354; AMDGPU-DISABLED-NEXT: entry: 3355; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3356; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3357; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3358; AMDGPU-DISABLED-NEXT: ret void 3359; 3360; 3361; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3362; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper 3363; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3364; AMDGPU-DISABLED-NEXT: entry: 3365; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3366; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3367; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3368; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3369; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3370; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3371; AMDGPU-DISABLED-NEXT: ret void 3372; 3373; 3374; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3375; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 3376; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 3377; NVPTX-DISABLED-NEXT: entry: 3378; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3379; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3380; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) 3381; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3382; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3383; NVPTX-DISABLED: user_code.entry: 3384; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] 3385; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3386; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3387; NVPTX-DISABLED-NEXT: ret void 3388; NVPTX-DISABLED: worker.exit: 3389; NVPTX-DISABLED-NEXT: ret void 3390; 3391; 3392; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init 3393; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 3394; NVPTX-DISABLED-NEXT: ret i32 0 3395; 3396; 3397; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3398; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 3399; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3400; NVPTX-DISABLED-NEXT: entry: 3401; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3402; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3403; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] 3404; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] 3405; NVPTX-DISABLED-NEXT: ret void 3406; 3407; 3408; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind 3409; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized 3410; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { 3411; NVPTX-DISABLED-NEXT: entry: 3412; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 3413; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 3414; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 3415; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 3416; NVPTX-DISABLED: omp_if.then: 3417; NVPTX-DISABLED-NEXT: store i32 0, ptr @G, align 4 3418; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] 3419; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]] 3420; NVPTX-DISABLED: omp_if.end: 3421; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] 3422; NVPTX-DISABLED-NEXT: ret void 3423; 3424; 3425; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind 3426; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here 3427; NVPTX-DISABLED-SAME: () #[[ATTR1]] { 3428; NVPTX-DISABLED-NEXT: entry: 3429; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 3430; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) 3431; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 3432; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] 3433; NVPTX-DISABLED: omp_if.then: 3434; NVPTX-DISABLED-NEXT: store i32 0, ptr @G, align 4 3435; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) 3436; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]] 3437; NVPTX-DISABLED: omp_if.end: 3438; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) 3439; NVPTX-DISABLED-NEXT: ret void 3440; 3441; 3442; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3443; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 3444; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3445; NVPTX-DISABLED-NEXT: entry: 3446; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3447; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3448; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) 3449; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3450; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3451; NVPTX-DISABLED: user_code.entry: 3452; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3453; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3454; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3455; NVPTX-DISABLED-NEXT: ret void 3456; NVPTX-DISABLED: worker.exit: 3457; NVPTX-DISABLED-NEXT: ret void 3458; 3459; 3460; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3461; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 3462; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3463; NVPTX-DISABLED-NEXT: entry: 3464; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3465; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3466; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 3467; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 3468; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3469; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 3470; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 3471; NVPTX-DISABLED-NEXT: ret void 3472; 3473; 3474; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3475; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 3476; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3477; NVPTX-DISABLED-NEXT: entry: 3478; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3479; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3480; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]] 3481; NVPTX-DISABLED-NEXT: ret void 3482; 3483; 3484; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3485; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper 3486; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3487; NVPTX-DISABLED-NEXT: entry: 3488; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3489; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3490; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3491; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3492; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3493; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3494; NVPTX-DISABLED-NEXT: ret void 3495; 3496; 3497; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3498; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 3499; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3500; NVPTX-DISABLED-NEXT: entry: 3501; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3502; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3503; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3504; NVPTX-DISABLED-NEXT: ret void 3505; 3506; 3507; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3508; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 3509; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3510; NVPTX-DISABLED-NEXT: entry: 3511; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3512; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3513; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3514; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3515; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3516; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3517; NVPTX-DISABLED-NEXT: ret void 3518; 3519; 3520; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3521; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 3522; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3523; NVPTX-DISABLED-NEXT: entry: 3524; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3525; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3526; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) 3527; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3528; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3529; NVPTX-DISABLED: user_code.entry: 3530; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3531; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3532; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3533; NVPTX-DISABLED-NEXT: ret void 3534; NVPTX-DISABLED: worker.exit: 3535; NVPTX-DISABLED-NEXT: ret void 3536; 3537; 3538; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3539; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 3540; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3541; NVPTX-DISABLED-NEXT: entry: 3542; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3543; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3544; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 3545; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] 3546; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] 3547; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3548; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] 3549; NVPTX-DISABLED-NEXT: ret void 3550; 3551; 3552; NVPTX-DISABLED: Function Attrs: noinline nounwind 3553; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized 3554; NVPTX-DISABLED-SAME: () #[[ATTR6:[0-9]+]] { 3555; NVPTX-DISABLED-NEXT: entry: 3556; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3557; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 3558; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3559; NVPTX-DISABLED-NEXT: ret void 3560; 3561; 3562; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind 3563; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before 3564; NVPTX-DISABLED-SAME: () #[[ATTR1]] { 3565; NVPTX-DISABLED-NEXT: entry: 3566; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3567; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 3568; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3569; NVPTX-DISABLED-NEXT: ret void 3570; 3571; 3572; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3573; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 3574; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3575; NVPTX-DISABLED-NEXT: entry: 3576; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3577; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3578; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3579; NVPTX-DISABLED-NEXT: ret void 3580; 3581; 3582; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3583; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 3584; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3585; NVPTX-DISABLED-NEXT: entry: 3586; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3587; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3588; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3589; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3590; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3591; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3592; NVPTX-DISABLED-NEXT: ret void 3593; 3594; 3595; NVPTX-DISABLED: Function Attrs: noinline nounwind 3596; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized 3597; NVPTX-DISABLED-SAME: () #[[ATTR6]] { 3598; NVPTX-DISABLED-NEXT: entry: 3599; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3600; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 3601; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3602; NVPTX-DISABLED-NEXT: ret void 3603; 3604; 3605; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind 3606; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after 3607; NVPTX-DISABLED-SAME: () #[[ATTR1]] { 3608; NVPTX-DISABLED-NEXT: entry: 3609; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3610; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 3611; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3612; NVPTX-DISABLED-NEXT: ret void 3613; 3614; 3615; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3616; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 3617; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3618; NVPTX-DISABLED-NEXT: entry: 3619; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3620; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3621; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) 3622; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3623; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3624; NVPTX-DISABLED: user_code.entry: 3625; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3626; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3627; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3628; NVPTX-DISABLED-NEXT: ret void 3629; NVPTX-DISABLED: worker.exit: 3630; NVPTX-DISABLED-NEXT: ret void 3631; 3632; 3633; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3634; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 3635; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3636; NVPTX-DISABLED-NEXT: entry: 3637; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3638; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3639; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 3640; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3641; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] 3642; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 3643; NVPTX-DISABLED-NEXT: ret void 3644; 3645; 3646; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3647; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 3648; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3649; NVPTX-DISABLED-NEXT: entry: 3650; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3651; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3652; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3653; NVPTX-DISABLED-NEXT: ret void 3654; 3655; 3656; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3657; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 3658; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3659; NVPTX-DISABLED-NEXT: entry: 3660; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3661; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3662; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3663; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3664; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3665; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3666; NVPTX-DISABLED-NEXT: ret void 3667; 3668; 3669; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3670; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 3671; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3672; NVPTX-DISABLED-NEXT: entry: 3673; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3674; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3675; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3676; NVPTX-DISABLED-NEXT: ret void 3677; 3678; 3679; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3680; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper 3681; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3682; NVPTX-DISABLED-NEXT: entry: 3683; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3684; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3685; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3686; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3687; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3688; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3689; NVPTX-DISABLED-NEXT: ret void 3690; 3691; 3692; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3693; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 3694; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3695; NVPTX-DISABLED-NEXT: entry: 3696; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3697; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3698; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) 3699; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3700; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3701; NVPTX-DISABLED: user_code.entry: 3702; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3703; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3704; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3705; NVPTX-DISABLED-NEXT: ret void 3706; NVPTX-DISABLED: worker.exit: 3707; NVPTX-DISABLED-NEXT: ret void 3708; 3709; 3710; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3711; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 3712; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3713; NVPTX-DISABLED-NEXT: entry: 3714; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3715; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3716; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 3717; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3718; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 3719; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 3720; NVPTX-DISABLED-NEXT: ret void 3721; 3722; 3723; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3724; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 3725; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3726; NVPTX-DISABLED-NEXT: entry: 3727; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3728; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3729; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3730; NVPTX-DISABLED-NEXT: ret void 3731; 3732; 3733; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3734; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper 3735; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3736; NVPTX-DISABLED-NEXT: entry: 3737; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3738; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3739; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3740; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3741; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3742; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3743; NVPTX-DISABLED-NEXT: ret void 3744; 3745; 3746; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3747; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 3748; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3749; NVPTX-DISABLED-NEXT: entry: 3750; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3751; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3752; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3753; NVPTX-DISABLED-NEXT: ret void 3754; 3755; 3756; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3757; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper 3758; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3759; NVPTX-DISABLED-NEXT: entry: 3760; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3761; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3762; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3763; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3764; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3765; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3766; NVPTX-DISABLED-NEXT: ret void 3767; 3768; 3769; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3770; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 3771; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3772; NVPTX-DISABLED-NEXT: entry: 3773; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3774; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3775; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) 3776; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3777; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3778; NVPTX-DISABLED: user_code.entry: 3779; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3780; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3781; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3782; NVPTX-DISABLED-NEXT: ret void 3783; NVPTX-DISABLED: worker.exit: 3784; NVPTX-DISABLED-NEXT: ret void 3785; 3786; 3787; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3788; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 3789; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3790; NVPTX-DISABLED-NEXT: entry: 3791; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3792; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 3793; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 3794; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] 3795; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3796; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) 3797; NVPTX-DISABLED-NEXT: ret void 3798; 3799; 3800; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3801; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 3802; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3803; NVPTX-DISABLED-NEXT: entry: 3804; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3805; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3806; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3807; NVPTX-DISABLED-NEXT: ret void 3808; 3809; 3810; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3811; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper 3812; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3813; NVPTX-DISABLED-NEXT: entry: 3814; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3815; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3816; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3817; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3818; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3819; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3820; NVPTX-DISABLED-NEXT: ret void 3821; 3822; 3823; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3824; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 3825; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3826; NVPTX-DISABLED-NEXT: entry: 3827; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3828; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3829; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]] 3830; NVPTX-DISABLED-NEXT: ret void 3831; 3832; 3833; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3834; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper 3835; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3836; NVPTX-DISABLED-NEXT: entry: 3837; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3838; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3839; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3840; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3841; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3842; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3843; NVPTX-DISABLED-NEXT: ret void 3844; 3845; 3846; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3847; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 3848; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3849; NVPTX-DISABLED-NEXT: entry: 3850; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3851; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3852; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) 3853; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3854; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3855; NVPTX-DISABLED: user_code.entry: 3856; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3857; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3858; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3859; NVPTX-DISABLED-NEXT: ret void 3860; NVPTX-DISABLED: worker.exit: 3861; NVPTX-DISABLED-NEXT: ret void 3862; 3863; 3864; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3865; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 3866; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3867; NVPTX-DISABLED-NEXT: entry: 3868; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3869; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3870; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] 3871; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] 3872; NVPTX-DISABLED-NEXT: ret void 3873; 3874; 3875; NVPTX-DISABLED: Function Attrs: noinline nounwind 3876; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized 3877; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { 3878; NVPTX-DISABLED-NEXT: entry: 3879; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 3880; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 3881; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 3882; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 3883; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 3884; NVPTX-DISABLED: if.then: 3885; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]] 3886; NVPTX-DISABLED: if.end: 3887; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 3888; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 3889; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] 3890; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] 3891; NVPTX-DISABLED-NEXT: br label [[RETURN]] 3892; NVPTX-DISABLED: return: 3893; NVPTX-DISABLED-NEXT: ret void 3894; 3895; 3896; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind 3897; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after 3898; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { 3899; NVPTX-DISABLED-NEXT: entry: 3900; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 3901; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 3902; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 3903; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 3904; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 3905; NVPTX-DISABLED: if.then: 3906; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]] 3907; NVPTX-DISABLED: if.end: 3908; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 3909; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 3910; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] 3911; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] 3912; NVPTX-DISABLED-NEXT: br label [[RETURN]] 3913; NVPTX-DISABLED: return: 3914; NVPTX-DISABLED-NEXT: ret void 3915; 3916; 3917; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3918; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 3919; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 3920; NVPTX-DISABLED-NEXT: entry: 3921; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3922; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3923; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) 3924; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3925; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3926; NVPTX-DISABLED: user_code.entry: 3927; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] 3928; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3929; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() 3930; NVPTX-DISABLED-NEXT: ret void 3931; NVPTX-DISABLED: worker.exit: 3932; NVPTX-DISABLED-NEXT: ret void 3933; 3934; 3935; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3936; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 3937; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3938; NVPTX-DISABLED-NEXT: entry: 3939; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3940; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3941; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]] 3942; NVPTX-DISABLED-NEXT: ret void 3943; 3944; 3945; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind 3946; NVPTX-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty 3947; NVPTX-DISABLED-SAME: () #[[ATTR1]] { 3948; NVPTX-DISABLED-NEXT: entry: 3949; NVPTX-DISABLED-NEXT: ret void 3950; 3951; 3952; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3953; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 3954; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3955; NVPTX-DISABLED-NEXT: entry: 3956; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3957; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3958; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3959; NVPTX-DISABLED-NEXT: ret void 3960; 3961; 3962; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3963; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper 3964; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3965; NVPTX-DISABLED-NEXT: entry: 3966; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3967; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3968; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3969; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3970; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3971; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3972; NVPTX-DISABLED-NEXT: ret void 3973; 3974; 3975; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3976; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 3977; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3978; NVPTX-DISABLED-NEXT: entry: 3979; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 3980; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 3981; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]] 3982; NVPTX-DISABLED-NEXT: ret void 3983; 3984; 3985; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 3986; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper 3987; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 3988; NVPTX-DISABLED-NEXT: entry: 3989; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3990; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3991; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3992; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3993; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3994; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 3995; NVPTX-DISABLED-NEXT: ret void 3996; 3997; 3998; NVPTX-DISABLED: Function Attrs: noinline nounwind 3999; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized 4000; NVPTX-DISABLED-SAME: () #[[ATTR6]] { 4001; NVPTX-DISABLED-NEXT: entry: 4002; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 4003; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] 4004; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 4005; NVPTX-DISABLED-NEXT: ret void 4006; 4007; 4008; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind 4009; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after 4010; NVPTX-DISABLED-SAME: () #[[ATTR1]] { 4011; NVPTX-DISABLED-NEXT: entry: 4012; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 4013; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) 4014; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 4015; NVPTX-DISABLED-NEXT: ret void 4016; 4017; 4018; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 4019; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 4020; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 4021; NVPTX-DISABLED-NEXT: entry: 4022; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 4023; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 4024; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]] 4025; NVPTX-DISABLED-NEXT: ret void 4026; 4027; 4028; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind 4029; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper 4030; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 4031; NVPTX-DISABLED-NEXT: entry: 4032; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 4033; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 4034; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 4035; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 4036; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 4037; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] 4038; NVPTX-DISABLED-NEXT: ret void 4039; 4040;. 4041; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4042; AMDGPU: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4043; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4044; AMDGPU: attributes #[[ATTR3]] = { nounwind } 4045; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4046; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } 4047; AMDGPU: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4048; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4049; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4050; AMDGPU: attributes #[[ATTR9]] = { convergent nounwind } 4051; AMDGPU: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } 4052; AMDGPU: attributes #[[ATTR11]] = { convergent } 4053;. 4054; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4055; NVPTX: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4056; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4057; NVPTX: attributes #[[ATTR3]] = { nounwind } 4058; NVPTX: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4059; NVPTX: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } 4060; NVPTX: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4061; NVPTX: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4062; NVPTX: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4063; NVPTX: attributes #[[ATTR9]] = { convergent nounwind } 4064; NVPTX: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } 4065; NVPTX: attributes #[[ATTR11]] = { convergent } 4066;. 4067; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4068; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4069; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4070; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind } 4071; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4072; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } 4073; AMDGPU-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4074; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4075; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4076; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent nounwind } 4077; AMDGPU-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } 4078; AMDGPU-DISABLED: attributes #[[ATTR11]] = { convergent } 4079;. 4080; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4081; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4082; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4083; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind } 4084; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4085; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } 4086; NVPTX-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4087; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4088; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } 4089; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent nounwind } 4090; NVPTX-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } 4091; NVPTX-DISABLED: attributes #[[ATTR11]] = { convergent } 4092;. 4093; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} 4094; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} 4095; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} 4096; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} 4097; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} 4098; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} 4099; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} 4100; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} 4101; AMDGPU: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 4102; AMDGPU: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} 4103; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 4104;. 4105; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} 4106; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} 4107; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} 4108; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} 4109; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} 4110; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} 4111; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} 4112; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} 4113; NVPTX: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 4114; NVPTX: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} 4115; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 4116;. 4117; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} 4118; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} 4119; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} 4120; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} 4121; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} 4122; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} 4123; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} 4124; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} 4125; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 4126; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} 4127; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 4128;. 4129; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} 4130; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} 4131; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} 4132; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} 4133; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} 4134; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} 4135; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} 4136; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} 4137; NVPTX-DISABLED: [[META8:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 4138; NVPTX-DISABLED: [[META9:![0-9]+]] = !{i32 7, !"openmp", i32 50} 4139; NVPTX-DISABLED: [[META10:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 4140;. 4141