1; RUN: opt -S -passes=openmp-opt -openmp-ir-builder-optimistic-attributes -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s 2; RUN: opt -S -passes=openmp-opt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s 3 4; C input used for this test: 5 6; void bar(void) { 7; #pragma omp parallel 8; { } 9; } 10; void foo(void) { 11; #pragma omp target teams 12; { 13; #pragma omp parallel 14; {} 15; bar(); 16; unknown(); 17; #pragma omp parallel 18; {} 19; } 20; } 21 22; Verify we replace the function pointer uses for the first and last outlined 23; region (1 and 3) but not for the middle one (2) because it could be called from 24; another kernel. 25 26; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef 27; CHECK-DAG: @__omp_outlined__2_wrapper.ID = private constant i8 undef 28 29; CHECK-DAG: icmp eq ptr %worker.work_fn, @__omp_outlined__1_wrapper.ID 30; CHECK-DAG: icmp eq ptr %worker.work_fn, @__omp_outlined__2_wrapper.ID 31 32 33; CHECK-DAG: call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr %{{.*}}, i64 0) 34; CHECK-DAG: call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr %{{.*}}, i64 0) 35; CHECK-DAG: call void @__kmpc_parallel_51(ptr @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %{{.*}}, i64 0) 36 37 38%struct.ident_t = type { i32, i32, i32, i32, ptr } 39%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 40%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 41 42@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 43@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 44@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 45@__omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 46 47define weak ptx_kernel void @__omp_offloading_10301_87b2c_foo_l7() "kernel" { 48entry: 49 %.zero.addr = alloca i32, align 4 50 %.threadid_temp. = alloca i32, align 4 51 store i32 0, ptr %.zero.addr, align 4 52 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_10301_87b2c_foo_l7_kernel_environment, ptr null) 53 %exec_user_code = icmp eq i32 %0, -1 54 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 55 56user_code.entry: ; preds = %entry 57 %1 = call i32 @__kmpc_global_thread_num(ptr @1) 58 store i32 %1, ptr %.threadid_temp., align 4 59 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) 60 call void @__kmpc_target_deinit() 61 ret void 62 63worker.exit: ; preds = %entry 64 ret void 65} 66 67define weak i32 @__kmpc_target_init(ptr %0, ptr) { 68 ret i32 0 69} 70 71declare void @unknown() 72 73define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 74entry: 75 %.global_tid..addr = alloca ptr, align 8 76 %.bound_tid..addr = alloca ptr, align 8 77 %captured_vars_addrs = alloca [0 x ptr], align 8 78 %captured_vars_addrs1 = alloca [0 x ptr], align 8 79 store ptr %.global_tid., ptr %.global_tid..addr, align 8 80 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 81 %0 = load ptr, ptr %.global_tid..addr, align 8 82 %1 = load i32, ptr %0, align 4 83 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0) 84 call void @bar() 85 call void @unknown() 86 call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs1, i64 0) 87 ret void 88} 89 90define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 91entry: 92 %.global_tid..addr = alloca ptr, align 8 93 %.bound_tid..addr = alloca ptr, align 8 94 store ptr %.global_tid., ptr %.global_tid..addr, align 8 95 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 96 ret void 97} 98 99define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) { 100entry: 101 %.addr = alloca i16, align 2 102 %.addr1 = alloca i32, align 4 103 %.zero.addr = alloca i32, align 4 104 %global_args = alloca ptr, align 8 105 store i32 0, ptr %.zero.addr, align 4 106 store i16 %0, ptr %.addr, align 2 107 store i32 %1, ptr %.addr1, align 4 108 call void @__kmpc_get_shared_variables(ptr %global_args) 109 call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) 110 ret void 111} 112 113declare void @__kmpc_get_shared_variables(ptr) 114 115declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) 116 117define hidden void @bar() { 118entry: 119 %captured_vars_addrs = alloca [0 x ptr], align 8 120 %0 = call i32 @__kmpc_global_thread_num(ptr @2) 121 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0) 122 ret void 123} 124 125define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 126entry: 127 %.global_tid..addr = alloca ptr, align 8 128 %.bound_tid..addr = alloca ptr, align 8 129 store ptr %.global_tid., ptr %.global_tid..addr, align 8 130 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 131 ret void 132} 133 134define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) { 135entry: 136 %.addr = alloca i16, align 2 137 %.addr1 = alloca i32, align 4 138 %.zero.addr = alloca i32, align 4 139 %global_args = alloca ptr, align 8 140 store i32 0, ptr %.zero.addr, align 4 141 store i16 %0, ptr %.addr, align 2 142 store i32 %1, ptr %.addr1, align 4 143 call void @__kmpc_get_shared_variables(ptr %global_args) 144 call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) 145 ret void 146} 147 148declare i32 @__kmpc_global_thread_num(ptr) 149 150declare void @__kmpc_target_deinit() 151 152define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 153entry: 154 %.global_tid..addr = alloca ptr, align 8 155 %.bound_tid..addr = alloca ptr, align 8 156 store ptr %.global_tid., ptr %.global_tid..addr, align 8 157 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 158 ret void 159} 160 161define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) { 162entry: 163 %.addr = alloca i16, align 2 164 %.addr1 = alloca i32, align 4 165 %.zero.addr = alloca i32, align 4 166 %global_args = alloca ptr, align 8 167 store i32 0, ptr %.zero.addr, align 4 168 store i16 %0, ptr %.addr, align 2 169 store i32 %1, ptr %.addr1, align 4 170 call void @__kmpc_get_shared_variables(ptr %global_args) 171 call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) 172 ret void 173} 174 175!omp_offload.info = !{!0} 176!llvm.module.flags = !{!2, !3} 177 178!0 = !{i32 0, i32 66305, i32 555956, !"foo", i32 7, i32 0} 179!2 = !{i32 7, !"openmp", i32 50} 180!3 = !{i32 7, !"openmp-device", i32 50} 181