1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU 3; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX 4; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1 5; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED2 6; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED1 7; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED2 8 9;; void unknown(void); 10;; [[omp::assume("ompx_spmd_amenable")]] void spmd_amenable(void); 11;; 12;; void sequential_loop() { 13;; #pragma omp target teams 14;; { 15;; for (int i = 0; i < 100; ++i) { 16;; #pragma omp parallel 17;; { 18;; unknown(); 19;; } 20;; } 21;; spmd_amenable(); 22;; } 23;; } 24;; 25;; [[omp::assume("ompx_spmd_amenable")]] void use(__attribute__((noescape)) int *); 26;; 27;; void sequential_loop_to_stack_var() { 28;; #pragma omp target teams 29;; { 30;; int x; 31;; use(&x); 32;; for (int i = 0; i < 100; ++i) { 33;; #pragma omp parallel 34;; { 35;; unknown(); 36;; } 37;; } 38;; spmd_amenable(); 39;; } 40;; } 41;; 42;; void sequential_loop_to_shared_var() { 43;; #pragma omp target teams 44;; { 45;; int x; 46;; for (int i = 0; i < 100; ++i) { 47;; #pragma omp parallel 48;; { 49;; x++; 50;; unknown(); 51;; } 52;; } 53;; spmd_amenable(); 54;; } 55;; } 56;; 57;; void sequential_loop_to_shared_var_guarded() { 58;; #pragma omp target teams 59;; { 60;; int x = 42; 61;; for (int i = 0; i < 100; ++i) { 62;; #pragma omp parallel 63;; { 64;; x++; 65;; unknown(); 66;; } 67;; } 68;; spmd_amenable(); 69;; } 70;; } 71;; 72;; void do_not_spmdize_target() { 73;; #pragma omp target teams 74;; { 75;; // Incompatible parallel level, called both 76;; // from parallel and target regions 77;; unknown(); 78;; } 79;; } 80;; 81;; void do_not_spmdize_task() { 82;; #pragma omp target 83;; { 84;; #pragma omp task 85;; spmd_amenable(); 86;; #pragma omp parallel 87;; unknown(); 88;; } 89;; } 90 91%struct.ident_t = type { i32, i32, i32, i32, ptr } 92%struct.kmp_task_t_with_privates = type { %struct.kmp_task_t } 93%struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t } 94%union.kmp_cmplrdata_t = type { ptr } 95%struct.anon = type {} 96%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 97%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 98 99@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 100@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 101@__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 102@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 103@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 104@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 105@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 106@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 107 108;. 109; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 110; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 111; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 112; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 113; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 114; AMDGPU: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 115; AMDGPU: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 116; AMDGPU: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 117; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 118; AMDGPU: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4 119; AMDGPU: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4 120; AMDGPU: @__omp_outlined__9_wrapper.ID = private constant i8 undef 121;. 122; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 123; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 124; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 125; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 126; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 127; NVPTX: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 128; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 129; NVPTX: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 130; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 131; NVPTX: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4 132; NVPTX: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4 133; NVPTX: @__omp_outlined__9_wrapper.ID = private constant i8 undef 134;. 135; AMDGPU-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 136; AMDGPU-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 137; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 138; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 139; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 140; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 141; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 142; AMDGPU-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 143; AMDGPU-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4 144; AMDGPU-DISABLED1: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4 145; AMDGPU-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef 146; AMDGPU-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef 147; AMDGPU-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef 148; AMDGPU-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef 149; AMDGPU-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef 150;. 151; AMDGPU-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 152; AMDGPU-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 153; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 154; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 155; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 156; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 157; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 158; AMDGPU-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 159; AMDGPU-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4 160; AMDGPU-DISABLED2: @x_shared.1 = internal addrspace(3) global [4 x i8] poison, align 4 161; AMDGPU-DISABLED2: @__omp_outlined__1_wrapper.ID = private constant i8 undef 162; AMDGPU-DISABLED2: @__omp_outlined__3_wrapper.ID = private constant i8 undef 163; AMDGPU-DISABLED2: @__omp_outlined__5_wrapper.ID = private constant i8 undef 164; AMDGPU-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef 165; AMDGPU-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef 166;. 167; NVPTX-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 168; NVPTX-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 169; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 170; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 171; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 172; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 173; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 174; NVPTX-DISABLED1: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 175; NVPTX-DISABLED1: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4 176; NVPTX-DISABLED1: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4 177; NVPTX-DISABLED1: @__omp_outlined__1_wrapper.ID = private constant i8 undef 178; NVPTX-DISABLED1: @__omp_outlined__3_wrapper.ID = private constant i8 undef 179; NVPTX-DISABLED1: @__omp_outlined__5_wrapper.ID = private constant i8 undef 180; NVPTX-DISABLED1: @__omp_outlined__7_wrapper.ID = private constant i8 undef 181; NVPTX-DISABLED1: @__omp_outlined__9_wrapper.ID = private constant i8 undef 182;. 183; NVPTX-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 184; NVPTX-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 185; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 186; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 187; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 188; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 189; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 190; NVPTX-DISABLED2: @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 191; NVPTX-DISABLED2: @x_shared = internal addrspace(3) global [4 x i8] poison, align 4 192; NVPTX-DISABLED2: @x_shared1 = internal addrspace(3) global [4 x i8] poison, align 4 193; NVPTX-DISABLED2: @__omp_outlined__1_wrapper.ID = private constant i8 undef 194; NVPTX-DISABLED2: @__omp_outlined__3_wrapper.ID = private constant i8 undef 195; NVPTX-DISABLED2: @__omp_outlined__5_wrapper.ID = private constant i8 undef 196; NVPTX-DISABLED2: @__omp_outlined__7_wrapper.ID = private constant i8 undef 197; NVPTX-DISABLED2: @__omp_outlined__9_wrapper.ID = private constant i8 undef 198;. 199define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { 200; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 201; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { 202; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 203; AMDGPU-NEXT: ret void 204; 205; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 206; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { 207; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 208; NVPTX-NEXT: ret void 209; 210; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 211; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] { 212; AMDGPU-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 213; AMDGPU-DISABLED1-NEXT: ret void 214; 215; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 216; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] { 217; AMDGPU-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 218; AMDGPU-DISABLED2-NEXT: ret void 219; 220; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 221; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] { 222; NVPTX-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 223; NVPTX-DISABLED1-NEXT: ret void 224; 225; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 226; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] { 227; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 228; NVPTX-DISABLED2-NEXT: ret void 229 call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 230 ret void 231} 232 233define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() { 234; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 235; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { 236; AMDGPU-NEXT: entry: 237; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 238; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 239; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) 240; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 241; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 242; AMDGPU: common.ret: 243; AMDGPU-NEXT: ret void 244; AMDGPU: user_code.entry: 245; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] 246; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] 247; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 248; AMDGPU-NEXT: call void @__kmpc_target_deinit() 249; AMDGPU-NEXT: br label [[COMMON_RET]] 250; 251; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 252; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { 253; NVPTX-NEXT: entry: 254; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 255; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 256; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) 257; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 258; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 259; NVPTX: common.ret: 260; NVPTX-NEXT: ret void 261; NVPTX: user_code.entry: 262; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] 263; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] 264; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 265; NVPTX-NEXT: call void @__kmpc_target_deinit() 266; NVPTX-NEXT: br label [[COMMON_RET]] 267; 268; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 269; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] { 270; AMDGPU-DISABLED1-NEXT: entry: 271; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 272; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 273; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 274; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) 275; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 276; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 277; AMDGPU-DISABLED1: is_worker_check: 278; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 279; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 280; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 281; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 282; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 283; AMDGPU-DISABLED1: worker_state_machine.begin: 284; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 285; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 286; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 287; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 288; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 289; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 290; AMDGPU-DISABLED1: worker_state_machine.finished: 291; AMDGPU-DISABLED1-NEXT: ret void 292; AMDGPU-DISABLED1: worker_state_machine.is_active.check: 293; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 294; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: 295; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 296; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: 297; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 298; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 299; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: 300; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 301; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: 302; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 303; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 304; AMDGPU-DISABLED1: worker_state_machine.done.barrier: 305; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 306; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 307; AMDGPU-DISABLED1: thread.user_code.check: 308; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 309; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 310; AMDGPU-DISABLED1: common.ret: 311; AMDGPU-DISABLED1-NEXT: ret void 312; AMDGPU-DISABLED1: user_code.entry: 313; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] 314; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] 315; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 316; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() 317; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] 318; 319; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 320; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] { 321; AMDGPU-DISABLED2-NEXT: entry: 322; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 323; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 324; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 325; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) 326; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 327; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 328; AMDGPU-DISABLED2: is_worker_check: 329; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 330; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 331; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 332; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 333; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 334; AMDGPU-DISABLED2: worker_state_machine.begin: 335; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 336; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 337; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 338; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 339; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 340; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 341; AMDGPU-DISABLED2: worker_state_machine.finished: 342; AMDGPU-DISABLED2-NEXT: ret void 343; AMDGPU-DISABLED2: worker_state_machine.is_active.check: 344; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 345; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: 346; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 347; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: 348; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 349; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 350; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1: 351; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 352; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: 353; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 354; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 355; AMDGPU-DISABLED2: worker_state_machine.done.barrier: 356; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 357; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 358; AMDGPU-DISABLED2: thread.user_code.check: 359; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 360; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 361; AMDGPU-DISABLED2: common.ret: 362; AMDGPU-DISABLED2-NEXT: ret void 363; AMDGPU-DISABLED2: user_code.entry: 364; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] 365; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] 366; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 367; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() 368; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] 369; 370; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 371; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] { 372; NVPTX-DISABLED1-NEXT: entry: 373; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 374; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 375; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 376; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) 377; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 378; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 379; NVPTX-DISABLED1: is_worker_check: 380; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 381; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 382; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 383; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 384; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 385; NVPTX-DISABLED1: worker_state_machine.begin: 386; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 387; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 388; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 389; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 390; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 391; NVPTX-DISABLED1: worker_state_machine.finished: 392; NVPTX-DISABLED1-NEXT: ret void 393; NVPTX-DISABLED1: worker_state_machine.is_active.check: 394; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 395; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: 396; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 397; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: 398; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 399; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 400; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: 401; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 402; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: 403; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 404; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 405; NVPTX-DISABLED1: worker_state_machine.done.barrier: 406; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 407; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 408; NVPTX-DISABLED1: thread.user_code.check: 409; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 410; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 411; NVPTX-DISABLED1: common.ret: 412; NVPTX-DISABLED1-NEXT: ret void 413; NVPTX-DISABLED1: user_code.entry: 414; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] 415; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] 416; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 417; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() 418; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] 419; 420; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 421; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] { 422; NVPTX-DISABLED2-NEXT: entry: 423; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 424; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 425; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 426; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) 427; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 428; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 429; NVPTX-DISABLED2: is_worker_check: 430; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 431; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 432; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 433; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 434; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 435; NVPTX-DISABLED2: worker_state_machine.begin: 436; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 437; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 438; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 439; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 440; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 441; NVPTX-DISABLED2: worker_state_machine.finished: 442; NVPTX-DISABLED2-NEXT: ret void 443; NVPTX-DISABLED2: worker_state_machine.is_active.check: 444; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 445; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: 446; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 447; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: 448; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 449; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 450; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1: 451; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 452; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: 453; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 454; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 455; NVPTX-DISABLED2: worker_state_machine.done.barrier: 456; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 457; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 458; NVPTX-DISABLED2: thread.user_code.check: 459; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 460; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 461; NVPTX-DISABLED2: common.ret: 462; NVPTX-DISABLED2-NEXT: ret void 463; NVPTX-DISABLED2: user_code.entry: 464; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] 465; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]] 466; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 467; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() 468; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] 469entry: 470 %.zero.addr = alloca i32, align 4 471 %.threadid_temp. = alloca i32, align 4 472 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) 473 %exec_user_code = icmp eq i32 %0, -1 474 br i1 %exec_user_code, label %user_code.entry, label %common.ret 475 476common.ret: ; preds = %entry, %user_code.entry 477 ret void 478 479user_code.entry: ; preds = %entry 480 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 481 store i32 0, ptr %.zero.addr, align 4 482 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 483 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6 484 call void @__kmpc_target_deinit() 485 br label %common.ret 486} 487 488; Function Attrs: alwaysinline convergent norecurse nounwind 489define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 490; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ 491; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 492; AMDGPU-NEXT: entry: 493; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 494; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 495; AMDGPU: for.cond: 496; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 497; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 498; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 499; AMDGPU: for.cond.cleanup: 500; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 501; AMDGPU-NEXT: ret void 502; AMDGPU: for.body: 503; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 504; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 505; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 506; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] 507; 508; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ 509; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 510; NVPTX-NEXT: entry: 511; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 512; NVPTX-NEXT: br label [[FOR_COND:%.*]] 513; NVPTX: for.cond: 514; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 515; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 516; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 517; NVPTX: for.cond.cleanup: 518; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 519; NVPTX-NEXT: ret void 520; NVPTX: for.body: 521; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 522; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 523; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 524; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] 525; 526; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ 527; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 528; AMDGPU-DISABLED1-NEXT: entry: 529; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 530; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 531; AMDGPU-DISABLED1: for.cond: 532; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 533; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 534; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 535; AMDGPU-DISABLED1: for.cond.cleanup: 536; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 537; AMDGPU-DISABLED1-NEXT: ret void 538; AMDGPU-DISABLED1: for.body: 539; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 540; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 541; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 542; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] 543; 544; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ 545; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 546; AMDGPU-DISABLED2-NEXT: entry: 547; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 548; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 549; AMDGPU-DISABLED2: for.cond: 550; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 551; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 552; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 553; AMDGPU-DISABLED2: for.cond.cleanup: 554; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 555; AMDGPU-DISABLED2-NEXT: ret void 556; AMDGPU-DISABLED2: for.body: 557; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 558; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 559; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 560; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] 561; 562; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ 563; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 564; NVPTX-DISABLED1-NEXT: entry: 565; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 566; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 567; NVPTX-DISABLED1: for.cond: 568; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 569; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 570; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 571; NVPTX-DISABLED1: for.cond.cleanup: 572; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 573; NVPTX-DISABLED1-NEXT: ret void 574; NVPTX-DISABLED1: for.body: 575; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 576; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 577; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 578; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] 579; 580; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ 581; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 582; NVPTX-DISABLED2-NEXT: entry: 583; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 584; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 585; NVPTX-DISABLED2: for.cond: 586; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 587; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 588; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 589; NVPTX-DISABLED2: for.cond.cleanup: 590; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 591; NVPTX-DISABLED2-NEXT: ret void 592; NVPTX-DISABLED2: for.body: 593; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 594; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 595; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 596; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] 597entry: 598 %captured_vars_addrs = alloca [0 x ptr], align 8 599 br label %for.cond 600 601for.cond: ; preds = %for.body, %entry 602 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 603 %cmp = icmp slt i32 %i.0, 100 604 br i1 %cmp, label %for.body, label %for.cond.cleanup 605 606for.cond.cleanup: ; preds = %for.cond 607 call void @spmd_amenable() #10 608 ret void 609 610for.body: ; preds = %for.cond 611 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 612 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0) 613 %inc = add nsw i32 %i.0, 1 614 br label %for.cond, !llvm.loop !22 615} 616 617; Function Attrs: alwaysinline convergent norecurse nounwind 618define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 619; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 620; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 621; AMDGPU-NEXT: entry: 622; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 623; AMDGPU-NEXT: ret void 624; 625; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 626; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 627; NVPTX-NEXT: entry: 628; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 629; NVPTX-NEXT: ret void 630; 631; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1 632; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 633; AMDGPU-DISABLED1-NEXT: entry: 634; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 635; AMDGPU-DISABLED1-NEXT: ret void 636; 637; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1 638; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 639; AMDGPU-DISABLED2-NEXT: entry: 640; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 641; AMDGPU-DISABLED2-NEXT: ret void 642; 643; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1 644; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 645; NVPTX-DISABLED1-NEXT: entry: 646; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 647; NVPTX-DISABLED1-NEXT: ret void 648; 649; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1 650; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 651; NVPTX-DISABLED2-NEXT: entry: 652; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 653; NVPTX-DISABLED2-NEXT: ret void 654entry: 655 call void @unknown() #11 656 ret void 657} 658 659; Function Attrs: convergent norecurse nounwind 660define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { 661; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 662; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 663; AMDGPU-NEXT: entry: 664; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 665; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 666; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 667; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 668; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 669; AMDGPU-NEXT: ret void 670; 671; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 672; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 673; NVPTX-NEXT: entry: 674; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 675; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 676; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 677; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 678; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 679; NVPTX-NEXT: ret void 680; 681; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 682; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 683; AMDGPU-DISABLED1-NEXT: entry: 684; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 685; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 686; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 687; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 688; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 689; AMDGPU-DISABLED1-NEXT: ret void 690; 691; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 692; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 693; AMDGPU-DISABLED2-NEXT: entry: 694; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 695; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 696; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 697; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 698; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 699; AMDGPU-DISABLED2-NEXT: ret void 700; 701; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 702; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 703; NVPTX-DISABLED1-NEXT: entry: 704; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 705; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 706; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 707; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 708; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 709; NVPTX-DISABLED1-NEXT: ret void 710; 711; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 712; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 713; NVPTX-DISABLED2-NEXT: entry: 714; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 715; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 716; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 717; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 718; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 719; NVPTX-DISABLED2-NEXT: ret void 720entry: 721 %.addr1 = alloca i32, align 4 722 %.zero.addr = alloca i32, align 4 723 %global_args = alloca ptr, align 8 724 store i32 %1, ptr %.addr1, align 4, !tbaa !18 725 store i32 0, ptr %.zero.addr, align 4 726 call void @__kmpc_get_shared_variables(ptr %global_args) 727 call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) #6 728 ret void 729} 730 731; Function Attrs: alwaysinline convergent norecurse nounwind 732define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 { 733; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 734; AMDGPU-SAME: () #[[ATTR0]] { 735; AMDGPU-NEXT: entry: 736; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 737; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 738; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) 739; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 740; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 741; AMDGPU: common.ret: 742; AMDGPU-NEXT: ret void 743; AMDGPU: user_code.entry: 744; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 745; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 746; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 747; AMDGPU-NEXT: call void @__kmpc_target_deinit() 748; AMDGPU-NEXT: br label [[COMMON_RET]] 749; 750; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 751; NVPTX-SAME: () #[[ATTR0]] { 752; NVPTX-NEXT: entry: 753; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 754; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 755; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) 756; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 757; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 758; NVPTX: common.ret: 759; NVPTX-NEXT: ret void 760; NVPTX: user_code.entry: 761; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 762; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 763; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 764; NVPTX-NEXT: call void @__kmpc_target_deinit() 765; NVPTX-NEXT: br label [[COMMON_RET]] 766; 767; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 768; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { 769; AMDGPU-DISABLED1-NEXT: entry: 770; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 771; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 772; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 773; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) 774; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 775; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 776; AMDGPU-DISABLED1: is_worker_check: 777; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 778; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 779; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 780; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 781; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 782; AMDGPU-DISABLED1: worker_state_machine.begin: 783; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 784; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 785; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 786; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 787; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 788; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 789; AMDGPU-DISABLED1: worker_state_machine.finished: 790; AMDGPU-DISABLED1-NEXT: ret void 791; AMDGPU-DISABLED1: worker_state_machine.is_active.check: 792; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 793; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: 794; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 795; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: 796; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 797; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 798; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: 799; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 800; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: 801; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 802; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 803; AMDGPU-DISABLED1: worker_state_machine.done.barrier: 804; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 805; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 806; AMDGPU-DISABLED1: thread.user_code.check: 807; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 808; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 809; AMDGPU-DISABLED1: common.ret: 810; AMDGPU-DISABLED1-NEXT: ret void 811; AMDGPU-DISABLED1: user_code.entry: 812; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 813; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 814; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 815; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() 816; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] 817; 818; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 819; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { 820; AMDGPU-DISABLED2-NEXT: entry: 821; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 822; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 823; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 824; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) 825; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 826; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 827; AMDGPU-DISABLED2: is_worker_check: 828; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 829; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 830; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 831; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 832; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 833; AMDGPU-DISABLED2: worker_state_machine.begin: 834; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 835; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 836; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 837; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 838; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 839; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 840; AMDGPU-DISABLED2: worker_state_machine.finished: 841; AMDGPU-DISABLED2-NEXT: ret void 842; AMDGPU-DISABLED2: worker_state_machine.is_active.check: 843; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 844; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: 845; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 846; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: 847; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 848; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 849; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1: 850; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 851; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: 852; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 853; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 854; AMDGPU-DISABLED2: worker_state_machine.done.barrier: 855; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 856; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 857; AMDGPU-DISABLED2: thread.user_code.check: 858; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 859; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 860; AMDGPU-DISABLED2: common.ret: 861; AMDGPU-DISABLED2-NEXT: ret void 862; AMDGPU-DISABLED2: user_code.entry: 863; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 864; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 865; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 866; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() 867; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] 868; 869; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 870; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { 871; NVPTX-DISABLED1-NEXT: entry: 872; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 873; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 874; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 875; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) 876; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 877; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 878; NVPTX-DISABLED1: is_worker_check: 879; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 880; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 881; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 882; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 883; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 884; NVPTX-DISABLED1: worker_state_machine.begin: 885; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 886; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 887; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 888; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 889; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 890; NVPTX-DISABLED1: worker_state_machine.finished: 891; NVPTX-DISABLED1-NEXT: ret void 892; NVPTX-DISABLED1: worker_state_machine.is_active.check: 893; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 894; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: 895; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 896; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: 897; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 898; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 899; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: 900; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 901; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: 902; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 903; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 904; NVPTX-DISABLED1: worker_state_machine.done.barrier: 905; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 906; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 907; NVPTX-DISABLED1: thread.user_code.check: 908; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 909; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 910; NVPTX-DISABLED1: common.ret: 911; NVPTX-DISABLED1-NEXT: ret void 912; NVPTX-DISABLED1: user_code.entry: 913; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 914; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 915; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 916; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() 917; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] 918; 919; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 920; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { 921; NVPTX-DISABLED2-NEXT: entry: 922; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 923; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 924; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 925; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) 926; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 927; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 928; NVPTX-DISABLED2: is_worker_check: 929; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 930; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 931; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 932; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 933; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 934; NVPTX-DISABLED2: worker_state_machine.begin: 935; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 936; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 937; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 938; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 939; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 940; NVPTX-DISABLED2: worker_state_machine.finished: 941; NVPTX-DISABLED2-NEXT: ret void 942; NVPTX-DISABLED2: worker_state_machine.is_active.check: 943; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 944; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: 945; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 946; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: 947; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 948; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 949; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1: 950; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 951; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: 952; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 953; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 954; NVPTX-DISABLED2: worker_state_machine.done.barrier: 955; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 956; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 957; NVPTX-DISABLED2: thread.user_code.check: 958; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 959; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 960; NVPTX-DISABLED2: common.ret: 961; NVPTX-DISABLED2-NEXT: ret void 962; NVPTX-DISABLED2: user_code.entry: 963; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 964; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 965; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 966; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() 967; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] 968entry: 969 %.zero.addr = alloca i32, align 4 970 %.threadid_temp. = alloca i32, align 4 971 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) 972 %exec_user_code = icmp eq i32 %0, -1 973 br i1 %exec_user_code, label %user_code.entry, label %common.ret 974 975common.ret: ; preds = %entry, %user_code.entry 976 ret void 977 978user_code.entry: ; preds = %entry 979 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 980 store i32 0, ptr %.zero.addr, align 4 981 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 982 call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6 983 call void @__kmpc_target_deinit() 984 br label %common.ret 985} 986 987; Function Attrs: alwaysinline convergent norecurse nounwind 988define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 989; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 990; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 991; AMDGPU-NEXT: entry: 992; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) 993; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 994; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr 995; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] 996; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 997; AMDGPU: for.cond: 998; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 999; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1000; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1001; AMDGPU: for.cond.cleanup: 1002; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 1003; AMDGPU-NEXT: ret void 1004; AMDGPU: for.body: 1005; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1006; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1007; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1008; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] 1009; 1010; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 1011; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1012; NVPTX-NEXT: entry: 1013; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 1014; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1015; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]] 1016; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1017; NVPTX: for.cond: 1018; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1019; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1020; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1021; NVPTX: for.cond.cleanup: 1022; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 1023; NVPTX-NEXT: ret void 1024; NVPTX: for.body: 1025; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1026; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1027; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1028; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] 1029; 1030; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 1031; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1032; AMDGPU-DISABLED1-NEXT: entry: 1033; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) 1034; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1035; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr 1036; AMDGPU-DISABLED1-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] 1037; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 1038; AMDGPU-DISABLED1: for.cond: 1039; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1040; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1041; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1042; AMDGPU-DISABLED1: for.cond.cleanup: 1043; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 1044; AMDGPU-DISABLED1-NEXT: ret void 1045; AMDGPU-DISABLED1: for.body: 1046; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1047; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1048; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1049; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] 1050; 1051; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 1052; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1053; AMDGPU-DISABLED2-NEXT: entry: 1054; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) 1055; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1056; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr 1057; AMDGPU-DISABLED2-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] 1058; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 1059; AMDGPU-DISABLED2: for.cond: 1060; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1061; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1062; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1063; AMDGPU-DISABLED2: for.cond.cleanup: 1064; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 1065; AMDGPU-DISABLED2-NEXT: ret void 1066; AMDGPU-DISABLED2: for.body: 1067; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1068; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1069; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1070; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] 1071; 1072; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 1073; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1074; NVPTX-DISABLED1-NEXT: entry: 1075; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 1076; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1077; NVPTX-DISABLED1-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]] 1078; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 1079; NVPTX-DISABLED1: for.cond: 1080; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1081; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1082; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1083; NVPTX-DISABLED1: for.cond.cleanup: 1084; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 1085; NVPTX-DISABLED1-NEXT: ret void 1086; NVPTX-DISABLED1: for.body: 1087; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1088; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1089; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1090; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] 1091; 1092; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 1093; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1094; NVPTX-DISABLED2-NEXT: entry: 1095; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 1096; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 1097; NVPTX-DISABLED2-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]] 1098; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 1099; NVPTX-DISABLED2: for.cond: 1100; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1101; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1102; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1103; NVPTX-DISABLED2: for.cond.cleanup: 1104; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 1105; NVPTX-DISABLED2-NEXT: ret void 1106; NVPTX-DISABLED2: for.body: 1107; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1108; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 1109; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1110; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] 1111entry: 1112 %captured_vars_addrs = alloca [0 x ptr], align 8 1113 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) 1114 call void @use(ptr nocapture %x) #10 1115 br label %for.cond 1116 1117for.cond: ; preds = %for.body, %entry 1118 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1119 %cmp = icmp slt i32 %i.0, 100 1120 br i1 %cmp, label %for.body, label %for.cond.cleanup 1121 1122for.cond.cleanup: ; preds = %for.cond 1123 call void @spmd_amenable() #10 1124 call void @__kmpc_free_shared(ptr %x, i64 4) 1125 ret void 1126 1127for.body: ; preds = %for.cond 1128 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 1129 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0) 1130 %inc = add nsw i32 %i.0, 1 1131 br label %for.cond, !llvm.loop !25 1132} 1133; Function Attrs: alwaysinline convergent norecurse nounwind 1134define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 1135; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 1136; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 1137; AMDGPU-NEXT: entry: 1138; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 1139; AMDGPU-NEXT: ret void 1140; 1141; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 1142; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 1143; NVPTX-NEXT: entry: 1144; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 1145; NVPTX-NEXT: ret void 1146; 1147; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3 1148; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 1149; AMDGPU-DISABLED1-NEXT: entry: 1150; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 1151; AMDGPU-DISABLED1-NEXT: ret void 1152; 1153; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3 1154; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 1155; AMDGPU-DISABLED2-NEXT: entry: 1156; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 1157; AMDGPU-DISABLED2-NEXT: ret void 1158; 1159; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3 1160; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 1161; NVPTX-DISABLED1-NEXT: entry: 1162; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 1163; NVPTX-DISABLED1-NEXT: ret void 1164; 1165; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3 1166; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 1167; NVPTX-DISABLED2-NEXT: entry: 1168; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 1169; NVPTX-DISABLED2-NEXT: ret void 1170entry: 1171 call void @unknown() #11 1172 ret void 1173} 1174 1175; Function Attrs: convergent norecurse nounwind 1176define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { 1177; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1178; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1179; AMDGPU-NEXT: entry: 1180; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1181; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1182; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1183; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1184; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1185; AMDGPU-NEXT: ret void 1186; 1187; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1188; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1189; NVPTX-NEXT: entry: 1190; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1191; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1192; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1193; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1194; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1195; NVPTX-NEXT: ret void 1196; 1197; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1198; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1199; AMDGPU-DISABLED1-NEXT: entry: 1200; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1201; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1202; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1203; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1204; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1205; AMDGPU-DISABLED1-NEXT: ret void 1206; 1207; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1208; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1209; AMDGPU-DISABLED2-NEXT: entry: 1210; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1211; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1212; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1213; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1214; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1215; AMDGPU-DISABLED2-NEXT: ret void 1216; 1217; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1218; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1219; NVPTX-DISABLED1-NEXT: entry: 1220; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1221; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1222; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1223; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1224; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1225; NVPTX-DISABLED1-NEXT: ret void 1226; 1227; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1228; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1229; NVPTX-DISABLED2-NEXT: entry: 1230; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1231; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1232; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1233; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1234; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1235; NVPTX-DISABLED2-NEXT: ret void 1236entry: 1237 %.addr1 = alloca i32, align 4 1238 %.zero.addr = alloca i32, align 4 1239 %global_args = alloca ptr, align 8 1240 store i32 %1, ptr %.addr1, align 4, !tbaa !18 1241 store i32 0, ptr %.zero.addr, align 4 1242 call void @__kmpc_get_shared_variables(ptr %global_args) 1243 call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #6 1244 ret void 1245} 1246 1247 1248; Function Attrs: alwaysinline convergent norecurse nounwind 1249define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 { 1250; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 1251; AMDGPU-SAME: () #[[ATTR0]] { 1252; AMDGPU-NEXT: entry: 1253; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1254; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1255; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) 1256; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1257; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1258; AMDGPU: common.ret: 1259; AMDGPU-NEXT: ret void 1260; AMDGPU: user_code.entry: 1261; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1262; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1263; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1264; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1265; AMDGPU-NEXT: br label [[COMMON_RET]] 1266; 1267; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 1268; NVPTX-SAME: () #[[ATTR0]] { 1269; NVPTX-NEXT: entry: 1270; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1271; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1272; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) 1273; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1274; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1275; NVPTX: common.ret: 1276; NVPTX-NEXT: ret void 1277; NVPTX: user_code.entry: 1278; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1279; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1280; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1281; NVPTX-NEXT: call void @__kmpc_target_deinit() 1282; NVPTX-NEXT: br label [[COMMON_RET]] 1283; 1284; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 1285; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { 1286; AMDGPU-DISABLED1-NEXT: entry: 1287; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1288; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1289; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1290; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) 1291; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1292; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1293; AMDGPU-DISABLED1: is_worker_check: 1294; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1295; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1296; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1297; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1298; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1299; AMDGPU-DISABLED1: worker_state_machine.begin: 1300; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1301; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1302; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1303; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1304; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1305; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1306; AMDGPU-DISABLED1: worker_state_machine.finished: 1307; AMDGPU-DISABLED1-NEXT: ret void 1308; AMDGPU-DISABLED1: worker_state_machine.is_active.check: 1309; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1310; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: 1311; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1312; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: 1313; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 1314; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1315; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: 1316; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1317; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: 1318; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 1319; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1320; AMDGPU-DISABLED1: worker_state_machine.done.barrier: 1321; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1322; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1323; AMDGPU-DISABLED1: thread.user_code.check: 1324; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1325; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1326; AMDGPU-DISABLED1: common.ret: 1327; AMDGPU-DISABLED1-NEXT: ret void 1328; AMDGPU-DISABLED1: user_code.entry: 1329; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1330; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1331; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1332; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() 1333; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] 1334; 1335; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 1336; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { 1337; AMDGPU-DISABLED2-NEXT: entry: 1338; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1339; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1340; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1341; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) 1342; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1343; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1344; AMDGPU-DISABLED2: is_worker_check: 1345; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1346; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1347; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1348; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1349; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1350; AMDGPU-DISABLED2: worker_state_machine.begin: 1351; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1352; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1353; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1354; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1355; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1356; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1357; AMDGPU-DISABLED2: worker_state_machine.finished: 1358; AMDGPU-DISABLED2-NEXT: ret void 1359; AMDGPU-DISABLED2: worker_state_machine.is_active.check: 1360; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1361; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: 1362; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1363; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: 1364; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 1365; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1366; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1: 1367; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1368; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: 1369; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 1370; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1371; AMDGPU-DISABLED2: worker_state_machine.done.barrier: 1372; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1373; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1374; AMDGPU-DISABLED2: thread.user_code.check: 1375; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1376; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1377; AMDGPU-DISABLED2: common.ret: 1378; AMDGPU-DISABLED2-NEXT: ret void 1379; AMDGPU-DISABLED2: user_code.entry: 1380; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1381; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1382; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1383; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() 1384; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] 1385; 1386; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 1387; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { 1388; NVPTX-DISABLED1-NEXT: entry: 1389; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 1390; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1391; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1392; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) 1393; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1394; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1395; NVPTX-DISABLED1: is_worker_check: 1396; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1397; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1398; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1399; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1400; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1401; NVPTX-DISABLED1: worker_state_machine.begin: 1402; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1403; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 1404; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 1405; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1406; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1407; NVPTX-DISABLED1: worker_state_machine.finished: 1408; NVPTX-DISABLED1-NEXT: ret void 1409; NVPTX-DISABLED1: worker_state_machine.is_active.check: 1410; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1411; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: 1412; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1413; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: 1414; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 1415; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1416; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: 1417; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1418; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: 1419; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 1420; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1421; NVPTX-DISABLED1: worker_state_machine.done.barrier: 1422; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1423; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1424; NVPTX-DISABLED1: thread.user_code.check: 1425; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1426; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1427; NVPTX-DISABLED1: common.ret: 1428; NVPTX-DISABLED1-NEXT: ret void 1429; NVPTX-DISABLED1: user_code.entry: 1430; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1431; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1432; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1433; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() 1434; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] 1435; 1436; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 1437; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { 1438; NVPTX-DISABLED2-NEXT: entry: 1439; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 1440; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1441; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1442; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) 1443; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1444; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1445; NVPTX-DISABLED2: is_worker_check: 1446; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1447; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1448; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1449; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1450; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1451; NVPTX-DISABLED2: worker_state_machine.begin: 1452; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1453; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 1454; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 1455; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1456; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1457; NVPTX-DISABLED2: worker_state_machine.finished: 1458; NVPTX-DISABLED2-NEXT: ret void 1459; NVPTX-DISABLED2: worker_state_machine.is_active.check: 1460; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1461; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: 1462; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1463; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: 1464; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 1465; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1466; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1: 1467; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1468; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: 1469; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 1470; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1471; NVPTX-DISABLED2: worker_state_machine.done.barrier: 1472; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1473; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1474; NVPTX-DISABLED2: thread.user_code.check: 1475; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1476; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1477; NVPTX-DISABLED2: common.ret: 1478; NVPTX-DISABLED2-NEXT: ret void 1479; NVPTX-DISABLED2: user_code.entry: 1480; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1481; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1482; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1483; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() 1484; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] 1485entry: 1486 %.zero.addr = alloca i32, align 4 1487 %.threadid_temp. = alloca i32, align 4 1488 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) 1489 %exec_user_code = icmp eq i32 %0, -1 1490 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1491 1492common.ret: ; preds = %entry, %user_code.entry 1493 ret void 1494 1495user_code.entry: ; preds = %entry 1496 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 1497 store i32 0, ptr %.zero.addr, align 4 1498 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 1499 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6 1500 call void @__kmpc_target_deinit() 1501 br label %common.ret 1502} 1503 1504; Function Attrs: alwaysinline convergent norecurse nounwind 1505define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 1506; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 1507; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1508; AMDGPU-NEXT: entry: 1509; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 1510; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 1511; AMDGPU: for.cond: 1512; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1513; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1514; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1515; AMDGPU: for.cond.cleanup: 1516; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 1517; AMDGPU-NEXT: ret void 1518; AMDGPU: for.body: 1519; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] 1520; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1521; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 1522; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1523; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 1524; 1525; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 1526; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1527; NVPTX-NEXT: entry: 1528; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 1529; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1530; NVPTX: for.cond: 1531; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1532; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1533; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1534; NVPTX: for.cond.cleanup: 1535; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 1536; NVPTX-NEXT: ret void 1537; NVPTX: for.body: 1538; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] 1539; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1540; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 1541; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1542; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 1543; 1544; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 1545; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1546; AMDGPU-DISABLED1-NEXT: entry: 1547; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 1548; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 1549; AMDGPU-DISABLED1: for.cond: 1550; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1551; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1552; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1553; AMDGPU-DISABLED1: for.cond.cleanup: 1554; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 1555; AMDGPU-DISABLED1-NEXT: ret void 1556; AMDGPU-DISABLED1: for.body: 1557; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] 1558; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1559; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 1560; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1561; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 1562; 1563; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 1564; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1565; AMDGPU-DISABLED2-NEXT: entry: 1566; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 1567; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 1568; AMDGPU-DISABLED2: for.cond: 1569; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1570; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1571; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1572; AMDGPU-DISABLED2: for.cond.cleanup: 1573; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 1574; AMDGPU-DISABLED2-NEXT: ret void 1575; AMDGPU-DISABLED2: for.body: 1576; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] 1577; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1578; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 1579; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1580; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 1581; 1582; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 1583; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1584; NVPTX-DISABLED1-NEXT: entry: 1585; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 1586; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 1587; NVPTX-DISABLED1: for.cond: 1588; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1589; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1590; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1591; NVPTX-DISABLED1: for.cond.cleanup: 1592; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 1593; NVPTX-DISABLED1-NEXT: ret void 1594; NVPTX-DISABLED1: for.body: 1595; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] 1596; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1597; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 1598; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1599; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 1600; 1601; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 1602; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 1603; NVPTX-DISABLED2-NEXT: entry: 1604; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 1605; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 1606; NVPTX-DISABLED2: for.cond: 1607; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1608; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1609; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1610; NVPTX-DISABLED2: for.cond.cleanup: 1611; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 1612; NVPTX-DISABLED2-NEXT: ret void 1613; NVPTX-DISABLED2: for.body: 1614; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]] 1615; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 1616; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 1617; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1618; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 1619entry: 1620 %captured_vars_addrs = alloca [1 x ptr], align 8 1621 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) 1622 br label %for.cond 1623 1624for.cond: ; preds = %for.body, %entry 1625 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1626 %cmp = icmp slt i32 %i.0, 100 1627 br i1 %cmp, label %for.body, label %for.cond.cleanup 1628 1629for.cond.cleanup: ; preds = %for.cond 1630 call void @spmd_amenable() #10 1631 call void @__kmpc_free_shared(ptr %x, i64 4) 1632 ret void 1633 1634for.body: ; preds = %for.cond 1635 store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26 1636 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 1637 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1) 1638 %inc = add nsw i32 %i.0, 1 1639 br label %for.cond, !llvm.loop !28 1640} 1641 1642; Function Attrs: alwaysinline convergent norecurse nounwind 1643define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) { 1644; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 1645; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 1646; AMDGPU-NEXT: entry: 1647; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 1648; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1649; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 1650; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 1651; AMDGPU-NEXT: ret void 1652; 1653; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 1654; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 1655; NVPTX-NEXT: entry: 1656; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 1657; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1658; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 1659; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 1660; NVPTX-NEXT: ret void 1661; 1662; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 1663; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 1664; AMDGPU-DISABLED1-NEXT: entry: 1665; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 1666; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1667; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 1668; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 1669; AMDGPU-DISABLED1-NEXT: ret void 1670; 1671; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 1672; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 1673; AMDGPU-DISABLED2-NEXT: entry: 1674; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 1675; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1676; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 1677; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 1678; AMDGPU-DISABLED2-NEXT: ret void 1679; 1680; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 1681; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 1682; NVPTX-DISABLED1-NEXT: entry: 1683; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 1684; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1685; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 1686; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 1687; NVPTX-DISABLED1-NEXT: ret void 1688; 1689; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 1690; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 1691; NVPTX-DISABLED2-NEXT: entry: 1692; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 1693; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1694; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 1695; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 1696; NVPTX-DISABLED2-NEXT: ret void 1697entry: 1698 %0 = load i32, ptr %x, align 4, !tbaa !18 1699 %inc = add nsw i32 %0, 1 1700 store i32 %inc, ptr %x, align 4, !tbaa !18 1701 call void @unknown() #11 1702 ret void 1703} 1704 1705; Function Attrs: convergent norecurse nounwind 1706define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { 1707; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1708; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1709; AMDGPU-NEXT: entry: 1710; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1711; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1712; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1713; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1714; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 1715; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 1716; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 1717; AMDGPU-NEXT: ret void 1718; 1719; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1720; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1721; NVPTX-NEXT: entry: 1722; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1723; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1724; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1725; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1726; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 1727; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 1728; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 1729; NVPTX-NEXT: ret void 1730; 1731; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1732; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1733; AMDGPU-DISABLED1-NEXT: entry: 1734; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1735; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1736; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1737; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1738; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 1739; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 1740; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 1741; AMDGPU-DISABLED1-NEXT: ret void 1742; 1743; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1744; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1745; AMDGPU-DISABLED2-NEXT: entry: 1746; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1747; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1748; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1749; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1750; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 1751; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 1752; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 1753; AMDGPU-DISABLED2-NEXT: ret void 1754; 1755; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1756; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1757; NVPTX-DISABLED1-NEXT: entry: 1758; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1759; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1760; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1761; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1762; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 1763; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 1764; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 1765; NVPTX-DISABLED1-NEXT: ret void 1766; 1767; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1768; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1769; NVPTX-DISABLED2-NEXT: entry: 1770; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1771; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1772; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 1773; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 1774; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 1775; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 1776; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 1777; NVPTX-DISABLED2-NEXT: ret void 1778entry: 1779 %.addr1 = alloca i32, align 4 1780 %.zero.addr = alloca i32, align 4 1781 %global_args = alloca ptr, align 8 1782 store i32 %1, ptr %.addr1, align 4, !tbaa !18 1783 store i32 0, ptr %.zero.addr, align 4 1784 call void @__kmpc_get_shared_variables(ptr %global_args) 1785 %2 = load ptr, ptr %global_args, align 8 1786 %3 = load ptr, ptr %2, align 8, !tbaa !26 1787 call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr, ptr %3) #6 1788 ret void 1789} 1790 1791; Function Attrs: alwaysinline convergent norecurse nounwind 1792define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 { 1793; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1794; AMDGPU-SAME: () #[[ATTR0]] { 1795; AMDGPU-NEXT: entry: 1796; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1797; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1798; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) 1799; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1800; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1801; AMDGPU: common.ret: 1802; AMDGPU-NEXT: ret void 1803; AMDGPU: user_code.entry: 1804; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1805; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1806; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1807; AMDGPU-NEXT: call void @__kmpc_target_deinit() 1808; AMDGPU-NEXT: br label [[COMMON_RET]] 1809; 1810; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1811; NVPTX-SAME: () #[[ATTR0]] { 1812; NVPTX-NEXT: entry: 1813; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1814; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1815; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) 1816; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1817; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1818; NVPTX: common.ret: 1819; NVPTX-NEXT: ret void 1820; NVPTX: user_code.entry: 1821; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1822; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1823; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1824; NVPTX-NEXT: call void @__kmpc_target_deinit() 1825; NVPTX-NEXT: br label [[COMMON_RET]] 1826; 1827; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1828; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { 1829; AMDGPU-DISABLED1-NEXT: entry: 1830; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1831; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1832; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1833; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) 1834; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1835; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1836; AMDGPU-DISABLED1: is_worker_check: 1837; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1838; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1839; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1840; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1841; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1842; AMDGPU-DISABLED1: worker_state_machine.begin: 1843; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1844; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1845; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1846; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1847; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1848; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1849; AMDGPU-DISABLED1: worker_state_machine.finished: 1850; AMDGPU-DISABLED1-NEXT: ret void 1851; AMDGPU-DISABLED1: worker_state_machine.is_active.check: 1852; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1853; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: 1854; AMDGPU-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1855; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: 1856; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1857; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1858; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check1: 1859; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1860; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: 1861; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 1862; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1863; AMDGPU-DISABLED1: worker_state_machine.done.barrier: 1864; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1865; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1866; AMDGPU-DISABLED1: thread.user_code.check: 1867; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1868; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1869; AMDGPU-DISABLED1: common.ret: 1870; AMDGPU-DISABLED1-NEXT: ret void 1871; AMDGPU-DISABLED1: user_code.entry: 1872; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1873; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1874; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1875; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() 1876; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] 1877; 1878; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1879; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { 1880; AMDGPU-DISABLED2-NEXT: entry: 1881; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1882; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1883; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1884; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) 1885; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1886; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1887; AMDGPU-DISABLED2: is_worker_check: 1888; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1889; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1890; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1891; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1892; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1893; AMDGPU-DISABLED2: worker_state_machine.begin: 1894; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1895; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 1896; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 1897; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1898; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1899; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1900; AMDGPU-DISABLED2: worker_state_machine.finished: 1901; AMDGPU-DISABLED2-NEXT: ret void 1902; AMDGPU-DISABLED2: worker_state_machine.is_active.check: 1903; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1904; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: 1905; AMDGPU-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1906; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: 1907; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1908; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1909; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check1: 1910; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1911; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: 1912; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 1913; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1914; AMDGPU-DISABLED2: worker_state_machine.done.barrier: 1915; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1916; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1917; AMDGPU-DISABLED2: thread.user_code.check: 1918; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1919; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1920; AMDGPU-DISABLED2: common.ret: 1921; AMDGPU-DISABLED2-NEXT: ret void 1922; AMDGPU-DISABLED2: user_code.entry: 1923; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1924; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1925; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1926; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() 1927; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] 1928; 1929; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1930; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { 1931; NVPTX-DISABLED1-NEXT: entry: 1932; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 1933; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1934; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1935; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) 1936; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1937; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1938; NVPTX-DISABLED1: is_worker_check: 1939; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1940; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1941; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1942; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1943; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1944; NVPTX-DISABLED1: worker_state_machine.begin: 1945; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1946; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 1947; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 1948; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1949; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1950; NVPTX-DISABLED1: worker_state_machine.finished: 1951; NVPTX-DISABLED1-NEXT: ret void 1952; NVPTX-DISABLED1: worker_state_machine.is_active.check: 1953; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1954; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: 1955; NVPTX-DISABLED1-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 1956; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: 1957; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1958; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1959; NVPTX-DISABLED1: worker_state_machine.parallel_region.check1: 1960; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1961; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: 1962; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 1963; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1964; NVPTX-DISABLED1: worker_state_machine.done.barrier: 1965; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1966; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1967; NVPTX-DISABLED1: thread.user_code.check: 1968; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1969; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1970; NVPTX-DISABLED1: common.ret: 1971; NVPTX-DISABLED1-NEXT: ret void 1972; NVPTX-DISABLED1: user_code.entry: 1973; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 1974; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 1975; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 1976; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() 1977; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] 1978; 1979; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1980; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { 1981; NVPTX-DISABLED2-NEXT: entry: 1982; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 1983; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1984; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1985; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) 1986; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1987; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1988; NVPTX-DISABLED2: is_worker_check: 1989; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1990; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1991; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1992; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1993; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1994; NVPTX-DISABLED2: worker_state_machine.begin: 1995; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 1996; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 1997; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 1998; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 1999; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2000; NVPTX-DISABLED2: worker_state_machine.finished: 2001; NVPTX-DISABLED2-NEXT: ret void 2002; NVPTX-DISABLED2: worker_state_machine.is_active.check: 2003; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2004; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: 2005; NVPTX-DISABLED2-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 2006; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: 2007; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 2008; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2009; NVPTX-DISABLED2: worker_state_machine.parallel_region.check1: 2010; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2011; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: 2012; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 2013; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2014; NVPTX-DISABLED2: worker_state_machine.done.barrier: 2015; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2016; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2017; NVPTX-DISABLED2: thread.user_code.check: 2018; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2019; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2020; NVPTX-DISABLED2: common.ret: 2021; NVPTX-DISABLED2-NEXT: ret void 2022; NVPTX-DISABLED2: user_code.entry: 2023; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2024; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]] 2025; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 2026; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() 2027; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] 2028entry: 2029 %.zero.addr = alloca i32, align 4 2030 %.threadid_temp. = alloca i32, align 4 2031 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) 2032 %exec_user_code = icmp eq i32 %0, -1 2033 br i1 %exec_user_code, label %user_code.entry, label %common.ret 2034 2035common.ret: ; preds = %entry, %user_code.entry 2036 ret void 2037 2038user_code.entry: ; preds = %entry 2039 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 2040 store i32 0, ptr %.zero.addr, align 4 2041 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 2042 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6 2043 call void @__kmpc_target_deinit() 2044 br label %common.ret 2045} 2046 2047; Function Attrs: alwaysinline convergent norecurse nounwind 2048define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 2049; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 2050; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 2051; AMDGPU-NEXT: entry: 2052; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 2053; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]] 2054; AMDGPU: region.check.tid: 2055; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() 2056; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 2057; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 2058; AMDGPU: region.guarded: 2059; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] 2060; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]] 2061; AMDGPU: region.guarded.end: 2062; AMDGPU-NEXT: br label [[REGION_BARRIER]] 2063; AMDGPU: region.barrier: 2064; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]]) 2065; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] 2066; AMDGPU: region.exit: 2067; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 2068; AMDGPU: for.cond: 2069; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 2070; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 2071; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 2072; AMDGPU: for.cond.cleanup: 2073; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 2074; AMDGPU-NEXT: ret void 2075; AMDGPU: for.body: 2076; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] 2077; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 2078; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 2079; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 2080; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] 2081; 2082; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 2083; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 2084; NVPTX-NEXT: entry: 2085; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 2086; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]] 2087; NVPTX: region.check.tid: 2088; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() 2089; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 2090; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 2091; NVPTX: region.guarded: 2092; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] 2093; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]] 2094; NVPTX: region.guarded.end: 2095; NVPTX-NEXT: br label [[REGION_BARRIER]] 2096; NVPTX: region.barrier: 2097; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]]) 2098; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] 2099; NVPTX: region.exit: 2100; NVPTX-NEXT: br label [[FOR_COND:%.*]] 2101; NVPTX: for.cond: 2102; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 2103; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 2104; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 2105; NVPTX: for.cond.cleanup: 2106; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 2107; NVPTX-NEXT: ret void 2108; NVPTX: for.body: 2109; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] 2110; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 2111; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 2112; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 2113; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] 2114; 2115; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 2116; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 2117; AMDGPU-DISABLED1-NEXT: entry: 2118; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 2119; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] 2120; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 2121; AMDGPU-DISABLED1: for.cond: 2122; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 2123; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 2124; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 2125; AMDGPU-DISABLED1: for.cond.cleanup: 2126; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 2127; AMDGPU-DISABLED1-NEXT: ret void 2128; AMDGPU-DISABLED1: for.body: 2129; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] 2130; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 2131; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 2132; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 2133; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] 2134; 2135; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 2136; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 2137; AMDGPU-DISABLED2-NEXT: entry: 2138; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 2139; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] 2140; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 2141; AMDGPU-DISABLED2: for.cond: 2142; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 2143; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 2144; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 2145; AMDGPU-DISABLED2: for.cond.cleanup: 2146; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 2147; AMDGPU-DISABLED2-NEXT: ret void 2148; AMDGPU-DISABLED2: for.body: 2149; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] 2150; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 2151; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 2152; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 2153; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] 2154; 2155; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 2156; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 2157; NVPTX-DISABLED1-NEXT: entry: 2158; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 2159; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] 2160; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] 2161; NVPTX-DISABLED1: for.cond: 2162; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 2163; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 2164; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 2165; NVPTX-DISABLED1: for.cond.cleanup: 2166; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 2167; NVPTX-DISABLED1-NEXT: ret void 2168; NVPTX-DISABLED1: for.body: 2169; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] 2170; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 2171; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 2172; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 2173; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] 2174; 2175; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 2176; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 2177; NVPTX-DISABLED2-NEXT: entry: 2178; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 2179; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]] 2180; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] 2181; NVPTX-DISABLED2: for.cond: 2182; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 2183; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 2184; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 2185; NVPTX-DISABLED2: for.cond.cleanup: 2186; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 2187; NVPTX-DISABLED2-NEXT: ret void 2188; NVPTX-DISABLED2: for.body: 2189; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]] 2190; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]] 2191; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) 2192; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 2193; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] 2194entry: 2195 %captured_vars_addrs = alloca [1 x ptr], align 8 2196 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) 2197 store i32 42, ptr %x, align 4, !tbaa !18 2198 br label %for.cond 2199 2200for.cond: ; preds = %for.body, %entry 2201 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 2202 %cmp = icmp slt i32 %i.0, 100 2203 br i1 %cmp, label %for.body, label %for.cond.cleanup 2204 2205for.cond.cleanup: ; preds = %for.cond 2206 call void @spmd_amenable() #10 2207 call void @__kmpc_free_shared(ptr %x, i64 4) 2208 ret void 2209 2210for.body: ; preds = %for.cond 2211 store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26 2212 %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 2213 call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 1) 2214 %inc = add nsw i32 %i.0, 1 2215 br label %for.cond, !llvm.loop !29 2216} 2217 2218; Function Attrs: alwaysinline convergent norecurse nounwind 2219define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) { 2220; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 2221; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 2222; AMDGPU-NEXT: entry: 2223; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 2224; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 2225; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 2226; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] 2227; AMDGPU-NEXT: ret void 2228; 2229; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 2230; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 2231; NVPTX-NEXT: entry: 2232; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 2233; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 2234; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 2235; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] 2236; NVPTX-NEXT: ret void 2237; 2238; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 2239; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 2240; AMDGPU-DISABLED1-NEXT: entry: 2241; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 2242; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 2243; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 2244; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] 2245; AMDGPU-DISABLED1-NEXT: ret void 2246; 2247; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 2248; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 2249; AMDGPU-DISABLED2-NEXT: entry: 2250; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 2251; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 2252; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 2253; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] 2254; AMDGPU-DISABLED2-NEXT: ret void 2255; 2256; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 2257; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 2258; NVPTX-DISABLED1-NEXT: entry: 2259; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 2260; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 2261; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 2262; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] 2263; NVPTX-DISABLED1-NEXT: ret void 2264; 2265; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 2266; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) { 2267; NVPTX-DISABLED2-NEXT: entry: 2268; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA12]] 2269; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 2270; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA12]] 2271; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] 2272; NVPTX-DISABLED2-NEXT: ret void 2273entry: 2274 %0 = load i32, ptr %x, align 4, !tbaa !18 2275 %inc = add nsw i32 %0, 1 2276 store i32 %inc, ptr %x, align 4, !tbaa !18 2277 call void @unknowni32p(ptr %x) #11 2278 ret void 2279} 2280 2281; Function Attrs: convergent norecurse nounwind 2282define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { 2283; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2284; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2285; AMDGPU-NEXT: entry: 2286; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2287; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2288; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2289; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2290; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 2291; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 2292; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 2293; AMDGPU-NEXT: ret void 2294; 2295; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2296; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2297; NVPTX-NEXT: entry: 2298; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2299; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2300; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2301; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2302; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 2303; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 2304; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 2305; NVPTX-NEXT: ret void 2306; 2307; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2308; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2309; AMDGPU-DISABLED1-NEXT: entry: 2310; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2311; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2312; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2313; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2314; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 2315; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 2316; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 2317; AMDGPU-DISABLED1-NEXT: ret void 2318; 2319; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2320; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2321; AMDGPU-DISABLED2-NEXT: entry: 2322; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2323; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2324; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2325; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2326; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 2327; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 2328; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 2329; AMDGPU-DISABLED2-NEXT: ret void 2330; 2331; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2332; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2333; NVPTX-DISABLED1-NEXT: entry: 2334; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2335; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2336; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2337; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2338; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 2339; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 2340; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 2341; NVPTX-DISABLED1-NEXT: ret void 2342; 2343; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 2344; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2345; NVPTX-DISABLED2-NEXT: entry: 2346; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2347; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2348; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 2349; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 2350; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 2351; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] 2352; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] 2353; NVPTX-DISABLED2-NEXT: ret void 2354entry: 2355 %.addr1 = alloca i32, align 4 2356 %.zero.addr = alloca i32, align 4 2357 %global_args = alloca ptr, align 8 2358 store i32 %1, ptr %.addr1, align 4, !tbaa !18 2359 store i32 0, ptr %.zero.addr, align 4 2360 call void @__kmpc_get_shared_variables(ptr %global_args) 2361 %2 = load ptr, ptr %global_args, align 8 2362 %3 = load ptr, ptr %2, align 8, !tbaa !26 2363 call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr, ptr %3) #6 2364 ret void 2365} 2366 2367; Function Attrs: alwaysinline convergent norecurse nounwind 2368define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 { 2369; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 2370; AMDGPU-SAME: () #[[ATTR0]] { 2371; AMDGPU-NEXT: entry: 2372; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 2373; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2374; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2375; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) 2376; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2377; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2378; AMDGPU: is_worker_check: 2379; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2380; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2381; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2382; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2383; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2384; AMDGPU: worker_state_machine.begin: 2385; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2386; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 2387; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 2388; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2389; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2390; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2391; AMDGPU: worker_state_machine.finished: 2392; AMDGPU-NEXT: ret void 2393; AMDGPU: worker_state_machine.is_active.check: 2394; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2395; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 2396; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2397; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2398; AMDGPU: worker_state_machine.parallel_region.end: 2399; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 2400; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2401; AMDGPU: worker_state_machine.done.barrier: 2402; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2403; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2404; AMDGPU: thread.user_code.check: 2405; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2406; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2407; AMDGPU: common.ret: 2408; AMDGPU-NEXT: ret void 2409; AMDGPU: user_code.entry: 2410; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2411; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 2412; AMDGPU-NEXT: call void @__kmpc_target_deinit() 2413; AMDGPU-NEXT: br label [[COMMON_RET]] 2414; 2415; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 2416; NVPTX-SAME: () #[[ATTR0]] { 2417; NVPTX-NEXT: entry: 2418; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2419; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2420; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2421; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) 2422; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2423; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2424; NVPTX: is_worker_check: 2425; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2426; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2427; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2428; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2429; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2430; NVPTX: worker_state_machine.begin: 2431; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2432; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2433; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2434; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2435; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2436; NVPTX: worker_state_machine.finished: 2437; NVPTX-NEXT: ret void 2438; NVPTX: worker_state_machine.is_active.check: 2439; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2440; NVPTX: worker_state_machine.parallel_region.fallback.execute: 2441; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2442; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2443; NVPTX: worker_state_machine.parallel_region.end: 2444; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2445; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2446; NVPTX: worker_state_machine.done.barrier: 2447; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2448; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2449; NVPTX: thread.user_code.check: 2450; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2451; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2452; NVPTX: common.ret: 2453; NVPTX-NEXT: ret void 2454; NVPTX: user_code.entry: 2455; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2456; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 2457; NVPTX-NEXT: call void @__kmpc_target_deinit() 2458; NVPTX-NEXT: br label [[COMMON_RET]] 2459; 2460; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 2461; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { 2462; AMDGPU-DISABLED1-NEXT: entry: 2463; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 2464; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2465; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2466; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) 2467; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2468; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2469; AMDGPU-DISABLED1: is_worker_check: 2470; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2471; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2472; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2473; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2474; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2475; AMDGPU-DISABLED1: worker_state_machine.begin: 2476; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2477; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 2478; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 2479; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2480; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2481; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2482; AMDGPU-DISABLED1: worker_state_machine.finished: 2483; AMDGPU-DISABLED1-NEXT: ret void 2484; AMDGPU-DISABLED1: worker_state_machine.is_active.check: 2485; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2486; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: 2487; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2488; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2489; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: 2490; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 2491; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2492; AMDGPU-DISABLED1: worker_state_machine.done.barrier: 2493; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2494; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2495; AMDGPU-DISABLED1: thread.user_code.check: 2496; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2497; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2498; AMDGPU-DISABLED1: common.ret: 2499; AMDGPU-DISABLED1-NEXT: ret void 2500; AMDGPU-DISABLED1: user_code.entry: 2501; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2502; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 2503; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() 2504; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] 2505; 2506; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 2507; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { 2508; AMDGPU-DISABLED2-NEXT: entry: 2509; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 2510; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2511; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2512; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) 2513; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2514; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2515; AMDGPU-DISABLED2: is_worker_check: 2516; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2517; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2518; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2519; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2520; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2521; AMDGPU-DISABLED2: worker_state_machine.begin: 2522; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2523; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 2524; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 2525; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2526; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2527; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2528; AMDGPU-DISABLED2: worker_state_machine.finished: 2529; AMDGPU-DISABLED2-NEXT: ret void 2530; AMDGPU-DISABLED2: worker_state_machine.is_active.check: 2531; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2532; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: 2533; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2534; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2535; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: 2536; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 2537; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2538; AMDGPU-DISABLED2: worker_state_machine.done.barrier: 2539; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2540; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2541; AMDGPU-DISABLED2: thread.user_code.check: 2542; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2543; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2544; AMDGPU-DISABLED2: common.ret: 2545; AMDGPU-DISABLED2-NEXT: ret void 2546; AMDGPU-DISABLED2: user_code.entry: 2547; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2548; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 2549; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() 2550; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] 2551; 2552; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 2553; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { 2554; NVPTX-DISABLED1-NEXT: entry: 2555; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2556; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2557; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2558; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) 2559; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2560; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2561; NVPTX-DISABLED1: is_worker_check: 2562; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2563; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2564; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2565; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2566; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2567; NVPTX-DISABLED1: worker_state_machine.begin: 2568; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2569; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2570; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2571; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2572; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2573; NVPTX-DISABLED1: worker_state_machine.finished: 2574; NVPTX-DISABLED1-NEXT: ret void 2575; NVPTX-DISABLED1: worker_state_machine.is_active.check: 2576; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2577; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: 2578; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2579; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2580; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: 2581; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 2582; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2583; NVPTX-DISABLED1: worker_state_machine.done.barrier: 2584; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2585; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2586; NVPTX-DISABLED1: thread.user_code.check: 2587; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2588; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2589; NVPTX-DISABLED1: common.ret: 2590; NVPTX-DISABLED1-NEXT: ret void 2591; NVPTX-DISABLED1: user_code.entry: 2592; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2593; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 2594; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() 2595; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] 2596; 2597; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 2598; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { 2599; NVPTX-DISABLED2-NEXT: entry: 2600; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2601; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2602; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2603; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) 2604; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2605; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2606; NVPTX-DISABLED2: is_worker_check: 2607; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2608; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2609; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2610; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2611; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2612; NVPTX-DISABLED2: worker_state_machine.begin: 2613; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2614; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2615; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2616; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2617; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2618; NVPTX-DISABLED2: worker_state_machine.finished: 2619; NVPTX-DISABLED2-NEXT: ret void 2620; NVPTX-DISABLED2: worker_state_machine.is_active.check: 2621; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2622; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: 2623; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2624; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2625; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: 2626; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 2627; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2628; NVPTX-DISABLED2: worker_state_machine.done.barrier: 2629; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2630; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2631; NVPTX-DISABLED2: thread.user_code.check: 2632; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2633; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2634; NVPTX-DISABLED2: common.ret: 2635; NVPTX-DISABLED2-NEXT: ret void 2636; NVPTX-DISABLED2: user_code.entry: 2637; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2638; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 2639; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() 2640; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] 2641entry: 2642 %.zero.addr = alloca i32, align 4 2643 %.threadid_temp. = alloca i32, align 4 2644 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) 2645 %exec_user_code = icmp eq i32 %0, -1 2646 br i1 %exec_user_code, label %user_code.entry, label %common.ret 2647 2648common.ret: ; preds = %entry, %user_code.entry 2649 ret void 2650 2651user_code.entry: ; preds = %entry 2652 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 2653 store i32 0, ptr %.zero.addr, align 4 2654 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 2655 call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6 2656 call void @__kmpc_target_deinit() 2657 br label %common.ret 2658} 2659 2660; Function Attrs: alwaysinline convergent norecurse nounwind 2661define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 2662; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 2663; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 2664; AMDGPU-NEXT: entry: 2665; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 2666; AMDGPU-NEXT: ret void 2667; 2668; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 2669; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 2670; NVPTX-NEXT: entry: 2671; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 2672; NVPTX-NEXT: ret void 2673; 2674; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8 2675; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 2676; AMDGPU-DISABLED1-NEXT: entry: 2677; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 2678; AMDGPU-DISABLED1-NEXT: ret void 2679; 2680; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8 2681; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 2682; AMDGPU-DISABLED2-NEXT: entry: 2683; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 2684; AMDGPU-DISABLED2-NEXT: ret void 2685; 2686; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8 2687; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 2688; NVPTX-DISABLED1-NEXT: entry: 2689; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 2690; NVPTX-DISABLED1-NEXT: ret void 2691; 2692; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8 2693; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 2694; NVPTX-DISABLED2-NEXT: entry: 2695; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 2696; NVPTX-DISABLED2-NEXT: ret void 2697entry: 2698 call void @unknown() #11 2699 ret void 2700} 2701 2702; Function Attrs: alwaysinline convergent norecurse nounwind 2703define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { 2704; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2705; AMDGPU-SAME: () #[[ATTR0]] { 2706; AMDGPU-NEXT: entry: 2707; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 2708; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2709; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) 2710; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2711; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2712; AMDGPU: is_worker_check: 2713; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2714; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2715; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2716; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2717; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2718; AMDGPU: worker_state_machine.begin: 2719; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2720; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 2721; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 2722; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2723; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2724; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2725; AMDGPU: worker_state_machine.finished: 2726; AMDGPU-NEXT: ret void 2727; AMDGPU: worker_state_machine.is_active.check: 2728; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2729; AMDGPU: worker_state_machine.parallel_region.check: 2730; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID 2731; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2732; AMDGPU: worker_state_machine.parallel_region.execute: 2733; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2734; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2735; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 2736; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2737; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2738; AMDGPU: worker_state_machine.parallel_region.end: 2739; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 2740; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2741; AMDGPU: worker_state_machine.done.barrier: 2742; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2743; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2744; AMDGPU: thread.user_code.check: 2745; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2746; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2747; AMDGPU: common.ret: 2748; AMDGPU-NEXT: ret void 2749; AMDGPU: user_code.entry: 2750; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2751; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] 2752; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] 2753; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2754; AMDGPU-NEXT: call void @__kmpc_target_deinit() 2755; AMDGPU-NEXT: br label [[COMMON_RET]] 2756; 2757; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2758; NVPTX-SAME: () #[[ATTR0]] { 2759; NVPTX-NEXT: entry: 2760; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2761; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2762; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) 2763; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2764; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2765; NVPTX: is_worker_check: 2766; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2767; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2768; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2769; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2770; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2771; NVPTX: worker_state_machine.begin: 2772; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2773; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2774; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2775; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2776; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2777; NVPTX: worker_state_machine.finished: 2778; NVPTX-NEXT: ret void 2779; NVPTX: worker_state_machine.is_active.check: 2780; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2781; NVPTX: worker_state_machine.parallel_region.check: 2782; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID 2783; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2784; NVPTX: worker_state_machine.parallel_region.execute: 2785; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2786; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2787; NVPTX: worker_state_machine.parallel_region.fallback.execute: 2788; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2789; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2790; NVPTX: worker_state_machine.parallel_region.end: 2791; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2792; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2793; NVPTX: worker_state_machine.done.barrier: 2794; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2795; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2796; NVPTX: thread.user_code.check: 2797; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2798; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2799; NVPTX: common.ret: 2800; NVPTX-NEXT: ret void 2801; NVPTX: user_code.entry: 2802; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2803; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] 2804; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] 2805; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2806; NVPTX-NEXT: call void @__kmpc_target_deinit() 2807; NVPTX-NEXT: br label [[COMMON_RET]] 2808; 2809; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2810; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { 2811; AMDGPU-DISABLED1-NEXT: entry: 2812; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 2813; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2814; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) 2815; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2816; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2817; AMDGPU-DISABLED1: is_worker_check: 2818; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2819; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2820; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2821; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2822; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2823; AMDGPU-DISABLED1: worker_state_machine.begin: 2824; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2825; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 2826; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 2827; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2828; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2829; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2830; AMDGPU-DISABLED1: worker_state_machine.finished: 2831; AMDGPU-DISABLED1-NEXT: ret void 2832; AMDGPU-DISABLED1: worker_state_machine.is_active.check: 2833; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2834; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: 2835; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID 2836; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2837; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: 2838; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2839; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2840; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: 2841; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2842; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2843; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: 2844; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 2845; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2846; AMDGPU-DISABLED1: worker_state_machine.done.barrier: 2847; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2848; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2849; AMDGPU-DISABLED1: thread.user_code.check: 2850; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2851; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2852; AMDGPU-DISABLED1: common.ret: 2853; AMDGPU-DISABLED1-NEXT: ret void 2854; AMDGPU-DISABLED1: user_code.entry: 2855; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2856; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] 2857; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] 2858; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2859; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() 2860; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] 2861; 2862; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2863; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { 2864; AMDGPU-DISABLED2-NEXT: entry: 2865; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 2866; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2867; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) 2868; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2869; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2870; AMDGPU-DISABLED2: is_worker_check: 2871; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2872; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2873; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2874; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2875; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2876; AMDGPU-DISABLED2: worker_state_machine.begin: 2877; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2878; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr 2879; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) 2880; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2881; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2882; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2883; AMDGPU-DISABLED2: worker_state_machine.finished: 2884; AMDGPU-DISABLED2-NEXT: ret void 2885; AMDGPU-DISABLED2: worker_state_machine.is_active.check: 2886; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2887; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: 2888; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID 2889; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2890; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: 2891; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2892; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2893; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: 2894; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2895; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2896; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: 2897; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 2898; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2899; AMDGPU-DISABLED2: worker_state_machine.done.barrier: 2900; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2901; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2902; AMDGPU-DISABLED2: thread.user_code.check: 2903; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2904; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2905; AMDGPU-DISABLED2: common.ret: 2906; AMDGPU-DISABLED2-NEXT: ret void 2907; AMDGPU-DISABLED2: user_code.entry: 2908; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2909; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] 2910; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] 2911; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2912; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() 2913; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] 2914; 2915; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2916; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { 2917; NVPTX-DISABLED1-NEXT: entry: 2918; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2919; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2920; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) 2921; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2922; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2923; NVPTX-DISABLED1: is_worker_check: 2924; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2925; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2926; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2927; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2928; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2929; NVPTX-DISABLED1: worker_state_machine.begin: 2930; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2931; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2932; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2933; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2934; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2935; NVPTX-DISABLED1: worker_state_machine.finished: 2936; NVPTX-DISABLED1-NEXT: ret void 2937; NVPTX-DISABLED1: worker_state_machine.is_active.check: 2938; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2939; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: 2940; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID 2941; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2942; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: 2943; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2944; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2945; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: 2946; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2947; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2948; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: 2949; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() 2950; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2951; NVPTX-DISABLED1: worker_state_machine.done.barrier: 2952; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2953; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2954; NVPTX-DISABLED1: thread.user_code.check: 2955; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2956; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2957; NVPTX-DISABLED1: common.ret: 2958; NVPTX-DISABLED1-NEXT: ret void 2959; NVPTX-DISABLED1: user_code.entry: 2960; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 2961; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] 2962; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] 2963; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 2964; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() 2965; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] 2966; 2967; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2968; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { 2969; NVPTX-DISABLED2-NEXT: entry: 2970; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 2971; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 2972; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) 2973; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2974; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2975; NVPTX-DISABLED2: is_worker_check: 2976; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2977; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2978; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2979; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2980; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2981; NVPTX-DISABLED2: worker_state_machine.begin: 2982; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 2983; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 2984; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 2985; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 2986; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2987; NVPTX-DISABLED2: worker_state_machine.finished: 2988; NVPTX-DISABLED2-NEXT: ret void 2989; NVPTX-DISABLED2: worker_state_machine.is_active.check: 2990; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2991; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: 2992; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__9_wrapper.ID 2993; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2994; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: 2995; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2996; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2997; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: 2998; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 2999; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 3000; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: 3001; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() 3002; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 3003; NVPTX-DISABLED2: worker_state_machine.done.barrier: 3004; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 3005; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 3006; NVPTX-DISABLED2: thread.user_code.check: 3007; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3008; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 3009; NVPTX-DISABLED2: common.ret: 3010; NVPTX-DISABLED2-NEXT: ret void 3011; NVPTX-DISABLED2: user_code.entry: 3012; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] 3013; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] 3014; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] 3015; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 3016; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() 3017; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] 3018entry: 3019 %captured_vars_addrs = alloca [0 x ptr], align 8 3020 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) 3021 %exec_user_code = icmp eq i32 %0, -1 3022 br i1 %exec_user_code, label %user_code.entry, label %common.ret 3023 3024common.ret: ; preds = %entry, %user_code.entry 3025 ret void 3026 3027user_code.entry: ; preds = %entry 3028 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 3029 %2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") 3030 %3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2) 3031 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0) 3032 call void @__kmpc_target_deinit() 3033 br label %common.ret 3034} 3035 3036; Function Attrs: alwaysinline convergent nounwind 3037define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #9 { 3038; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. 3039; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 3040; AMDGPU-NEXT: entry: 3041; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 3042; AMDGPU-NEXT: ret void 3043; 3044; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. 3045; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 3046; NVPTX-NEXT: entry: 3047; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 3048; NVPTX-NEXT: ret void 3049; 3050; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined. 3051; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 3052; AMDGPU-DISABLED1-NEXT: entry: 3053; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 3054; AMDGPU-DISABLED1-NEXT: ret void 3055; 3056; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined. 3057; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 3058; AMDGPU-DISABLED2-NEXT: entry: 3059; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 3060; AMDGPU-DISABLED2-NEXT: ret void 3061; 3062; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined. 3063; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 3064; NVPTX-DISABLED1-NEXT: entry: 3065; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]] 3066; NVPTX-DISABLED1-NEXT: ret void 3067; 3068; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined. 3069; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 3070; NVPTX-DISABLED2-NEXT: entry: 3071; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]] 3072; NVPTX-DISABLED2-NEXT: ret void 3073entry: 3074 call void @spmd_amenable() #10 3075 ret void 3076} 3077 3078; Function Attrs: convergent norecurse nounwind 3079define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #3 { 3080entry: 3081 %2 = getelementptr inbounds %struct.kmp_task_t, ptr %1, i32 0, i32 2 3082 %3 = load ptr, ptr %1, align 8, !tbaa !30 3083 call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #6 3084 ret i32 0 3085} 3086 3087; Function Attrs: nounwind 3088declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #6 3089 3090; Function Attrs: nounwind 3091declare i32 @__kmpc_omp_task(ptr, i32, ptr) #6 3092 3093; Function Attrs: nosync nounwind 3094declare void @__kmpc_free_shared(ptr nocapture, i64) #8 3095 3096; Function Attrs: nofree nosync nounwind 3097declare ptr @__kmpc_alloc_shared(i64) #7 3098 3099; Function Attrs: convergent 3100declare void @use(ptr nocapture) #5 3101 3102; Function Attrs: convergent 3103declare void @unknown() #2 3104declare void @unknowni32p(ptr) #2 3105 3106; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 3107declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 3108 3109; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. 3110define weak i32 @__kmpc_target_init(ptr, ptr) { 3111; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init 3112; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 3113; AMDGPU-NEXT: ret i32 0 3114; 3115; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init 3116; NVPTX-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 3117; NVPTX-NEXT: ret i32 0 3118; 3119; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init 3120; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 3121; AMDGPU-DISABLED1-NEXT: ret i32 0 3122; 3123; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init 3124; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 3125; AMDGPU-DISABLED2-NEXT: ret i32 0 3126; 3127; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init 3128; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 3129; NVPTX-DISABLED1-NEXT: ret i32 0 3130; 3131; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init 3132; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 3133; NVPTX-DISABLED2-NEXT: ret i32 0 3134 ret i32 0 3135} 3136 3137declare void @__kmpc_get_shared_variables(ptr) 3138 3139; Function Attrs: alwaysinline 3140declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4 3141 3142; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 3143declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 3144 3145; Function Attrs: convergent 3146declare void @spmd_amenable() #5 3147 3148; Function Attrs: nounwind 3149declare i32 @__kmpc_global_thread_num(ptr) #6 3150 3151declare void @__kmpc_target_deinit() 3152 3153 3154; Function Attrs: alwaysinline convergent norecurse nounwind 3155define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 3156; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 3157; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 3158; AMDGPU-NEXT: entry: 3159; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 3160; AMDGPU-NEXT: ret void 3161; 3162; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 3163; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 3164; NVPTX-NEXT: entry: 3165; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 3166; NVPTX-NEXT: ret void 3167; 3168; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9 3169; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 3170; AMDGPU-DISABLED1-NEXT: entry: 3171; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 3172; AMDGPU-DISABLED1-NEXT: ret void 3173; 3174; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9 3175; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 3176; AMDGPU-DISABLED2-NEXT: entry: 3177; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 3178; AMDGPU-DISABLED2-NEXT: ret void 3179; 3180; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9 3181; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 3182; NVPTX-DISABLED1-NEXT: entry: 3183; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]] 3184; NVPTX-DISABLED1-NEXT: ret void 3185; 3186; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9 3187; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 3188; NVPTX-DISABLED2-NEXT: entry: 3189; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]] 3190; NVPTX-DISABLED2-NEXT: ret void 3191; 3192entry: 3193 call void @unknown() #11 3194 ret void 3195} 3196 3197; Function Attrs: convergent norecurse nounwind 3198define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { 3199; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 3200; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 3201; AMDGPU-NEXT: entry: 3202; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3203; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3204; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3205; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3206; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 3207; AMDGPU-NEXT: ret void 3208; 3209; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 3210; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 3211; NVPTX-NEXT: entry: 3212; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3213; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3214; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3215; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3216; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 3217; NVPTX-NEXT: ret void 3218; 3219; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 3220; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 3221; AMDGPU-DISABLED1-NEXT: entry: 3222; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3223; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3224; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3225; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3226; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 3227; AMDGPU-DISABLED1-NEXT: ret void 3228; 3229; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 3230; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 3231; AMDGPU-DISABLED2-NEXT: entry: 3232; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3233; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3234; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3235; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3236; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 3237; AMDGPU-DISABLED2-NEXT: ret void 3238; 3239; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 3240; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 3241; NVPTX-DISABLED1-NEXT: entry: 3242; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3243; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3244; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3245; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3246; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 3247; NVPTX-DISABLED1-NEXT: ret void 3248; 3249; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 3250; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 3251; NVPTX-DISABLED2-NEXT: entry: 3252; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3253; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3254; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 3255; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 3256; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] 3257; NVPTX-DISABLED2-NEXT: ret void 3258entry: 3259 %.addr1 = alloca i32, align 4 3260 %.zero.addr = alloca i32, align 4 3261 %global_args = alloca ptr, align 8 3262 store i32 %1, ptr %.addr1, align 4, !tbaa !18 3263 store i32 0, ptr %.zero.addr, align 4 3264 call void @__kmpc_get_shared_variables(ptr %global_args) 3265 call void @__omp_outlined__9(ptr %.addr1, ptr %.zero.addr) #6 3266 ret void 3267} 3268 3269declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block(); 3270 3271attributes #0 = { alwaysinline convergent norecurse nounwind "kernel" } 3272attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } 3273attributes #2 = { convergent } 3274attributes #3 = { convergent norecurse nounwind } 3275attributes #4 = { alwaysinline } 3276attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" } 3277attributes #6 = { nounwind } 3278attributes #7 = { nofree nosync nounwind } 3279attributes #8 = { nosync nounwind } 3280attributes #9 = { alwaysinline convergent nounwind } 3281attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" } 3282attributes #11 = { convergent } 3283 3284!omp_offload.info = !{!0, !1, !2, !3, !4, !5} 3285!llvm.module.flags = !{!12, !13, !14, !15, !16} 3286!llvm.ident = !{!17} 3287 3288!0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 3289!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 3290!2 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 3291!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 3292!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 3293!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 3294!12 = !{i32 1, !"wchar_size", i32 4} 3295!13 = !{i32 7, !"openmp", i32 50} 3296!14 = !{i32 7, !"openmp-device", i32 50} 3297!15 = !{i32 8, !"PIC Level", i32 2} 3298!16 = !{i32 7, !"frame-pointer", i32 2} 3299!17 = !{!"clang version 14.0.0"} 3300!18 = !{!19, !19, i64 0} 3301!19 = !{!"int", !20, i64 0} 3302!20 = !{!"omnipotent char", !21, i64 0} 3303!21 = !{!"Simple C/C++ TBAA"} 3304!22 = distinct !{!22, !23, !24} 3305!23 = !{!"llvm.loop.mustprogress"} 3306!24 = !{!"llvm.loop.unroll.disable"} 3307!25 = distinct !{!25, !23, !24} 3308!26 = !{!27, !27, i64 0} 3309!27 = !{!"any pointer", !20, i64 0} 3310!28 = distinct !{!28, !23, !24} 3311!29 = distinct !{!29, !23, !24} 3312!30 = !{!31, !27, i64 0} 3313!31 = !{!"kmp_task_t_with_privates", !32, i64 0} 3314!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 3315;. 3316; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" } 3317; AMDGPU: attributes #[[ATTR1]] = { norecurse } 3318; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind } 3319; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 3320; AMDGPU: attributes #[[ATTR4]] = { nounwind } 3321; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 3322; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 3323; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 3324; AMDGPU: attributes #[[ATTR8]] = { convergent } 3325; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 3326; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 3327; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 3328;. 3329; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" } 3330; NVPTX: attributes #[[ATTR1]] = { norecurse } 3331; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind } 3332; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 3333; NVPTX: attributes #[[ATTR4]] = { nounwind } 3334; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 3335; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 3336; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 3337; NVPTX: attributes #[[ATTR8]] = { convergent } 3338; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 3339; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 3340; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 3341;. 3342; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" } 3343; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse } 3344; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind } 3345; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 3346; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { nounwind } 3347; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 3348; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 3349; AMDGPU-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 3350; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent } 3351; AMDGPU-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 3352; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 3353; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 3354;. 3355; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" } 3356; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse } 3357; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind } 3358; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 3359; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { nounwind } 3360; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 3361; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 3362; AMDGPU-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 3363; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent } 3364; AMDGPU-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 3365; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 3366; AMDGPU-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 3367;. 3368; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" } 3369; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse } 3370; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind } 3371; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 3372; NVPTX-DISABLED1: attributes #[[ATTR4]] = { nounwind } 3373; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 3374; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 3375; NVPTX-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 3376; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent } 3377; NVPTX-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 3378; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 3379; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 3380;. 3381; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" } 3382; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse } 3383; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind } 3384; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 3385; NVPTX-DISABLED2: attributes #[[ATTR4]] = { nounwind } 3386; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 3387; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 3388; NVPTX-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 3389; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent } 3390; NVPTX-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 3391; NVPTX-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 3392; NVPTX-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 3393;. 3394; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 3395; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 3396; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 3397; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 3398; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 3399; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 3400; AMDGPU: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 3401; AMDGPU: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} 3402; AMDGPU: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 3403; AMDGPU: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 3404; AMDGPU: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 3405; AMDGPU: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 3406; AMDGPU: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} 3407; AMDGPU: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} 3408; AMDGPU: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} 3409; AMDGPU: [[META15]] = !{!"Simple C/C++ TBAA"} 3410; AMDGPU: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} 3411; AMDGPU: [[META17]] = !{!"llvm.loop.mustprogress"} 3412; AMDGPU: [[META18]] = !{!"llvm.loop.unroll.disable"} 3413; AMDGPU: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} 3414; AMDGPU: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} 3415; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0} 3416; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} 3417; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} 3418;. 3419; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 3420; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 3421; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 3422; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 3423; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 3424; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 3425; NVPTX: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 3426; NVPTX: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} 3427; NVPTX: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 3428; NVPTX: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 3429; NVPTX: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 3430; NVPTX: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 3431; NVPTX: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} 3432; NVPTX: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} 3433; NVPTX: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} 3434; NVPTX: [[META15]] = !{!"Simple C/C++ TBAA"} 3435; NVPTX: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} 3436; NVPTX: [[META17]] = !{!"llvm.loop.mustprogress"} 3437; NVPTX: [[META18]] = !{!"llvm.loop.unroll.disable"} 3438; NVPTX: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} 3439; NVPTX: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} 3440; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0} 3441; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} 3442; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} 3443;. 3444; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 3445; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 3446; AMDGPU-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 3447; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 3448; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 3449; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 3450; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 3451; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} 3452; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 3453; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 3454; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 3455; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 3456; AMDGPU-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} 3457; AMDGPU-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} 3458; AMDGPU-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} 3459; AMDGPU-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"} 3460; AMDGPU-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} 3461; AMDGPU-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"} 3462; AMDGPU-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"} 3463; AMDGPU-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} 3464; AMDGPU-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} 3465; AMDGPU-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0} 3466; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} 3467; AMDGPU-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} 3468;. 3469; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 3470; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 3471; AMDGPU-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 3472; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 3473; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 3474; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 3475; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 3476; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} 3477; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 3478; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 3479; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 3480; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 3481; AMDGPU-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} 3482; AMDGPU-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} 3483; AMDGPU-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} 3484; AMDGPU-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"} 3485; AMDGPU-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} 3486; AMDGPU-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"} 3487; AMDGPU-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"} 3488; AMDGPU-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} 3489; AMDGPU-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} 3490; AMDGPU-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0} 3491; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} 3492; AMDGPU-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} 3493;. 3494; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 3495; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 3496; NVPTX-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 3497; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 3498; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 3499; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 3500; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 3501; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} 3502; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 3503; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 3504; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 3505; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 3506; NVPTX-DISABLED1: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} 3507; NVPTX-DISABLED1: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} 3508; NVPTX-DISABLED1: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} 3509; NVPTX-DISABLED1: [[META15]] = !{!"Simple C/C++ TBAA"} 3510; NVPTX-DISABLED1: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} 3511; NVPTX-DISABLED1: [[META17]] = !{!"llvm.loop.mustprogress"} 3512; NVPTX-DISABLED1: [[META18]] = !{!"llvm.loop.unroll.disable"} 3513; NVPTX-DISABLED1: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} 3514; NVPTX-DISABLED1: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} 3515; NVPTX-DISABLED1: [[META21]] = !{!"any pointer", [[META14]], i64 0} 3516; NVPTX-DISABLED1: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} 3517; NVPTX-DISABLED1: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} 3518;. 3519; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 3520; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 3521; NVPTX-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 3522; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 3523; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 3524; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 3525; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 3526; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} 3527; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 3528; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 3529; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 3530; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 3531; NVPTX-DISABLED2: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} 3532; NVPTX-DISABLED2: [[META13]] = !{!"int", [[META14:![0-9]+]], i64 0} 3533; NVPTX-DISABLED2: [[META14]] = !{!"omnipotent char", [[META15:![0-9]+]], i64 0} 3534; NVPTX-DISABLED2: [[META15]] = !{!"Simple C/C++ TBAA"} 3535; NVPTX-DISABLED2: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]], [[META18:![0-9]+]]} 3536; NVPTX-DISABLED2: [[META17]] = !{!"llvm.loop.mustprogress"} 3537; NVPTX-DISABLED2: [[META18]] = !{!"llvm.loop.unroll.disable"} 3538; NVPTX-DISABLED2: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]], [[META18]]} 3539; NVPTX-DISABLED2: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} 3540; NVPTX-DISABLED2: [[META21]] = !{!"any pointer", [[META14]], i64 0} 3541; NVPTX-DISABLED2: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} 3542; NVPTX-DISABLED2: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} 3543;. 3544