1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s 3target triple = "nvptx64" 4 5%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 6%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 7 8@G = external global i32 9 10@kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } 11@kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } 12@kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } 13 14;. 15; CHECK: @G = external global i32 16; CHECK: @kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 666, i32 0, i32 777, i32 0, i32 0 }, ptr null, ptr null } 17; CHECK: @kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 666, i32 0, i32 777, i32 0, i32 0 }, ptr null, ptr null } 18; CHECK: @kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 666, i32 0, i32 777, i32 0, i32 0 }, ptr null, ptr null } 19; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 20; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 21;. 22define weak ptx_kernel void @kernel0(ptr %dyn) "kernel" #0 { 23; CHECK-LABEL: define {{[^@]+}}@kernel0 24; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 25; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment, ptr [[DYN]]) 26; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 27; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 28; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] 29; CHECK: exit.threads: 30; CHECK-NEXT: ret void 31; CHECK: main.thread.user_code: 32; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] 33; CHECK-NEXT: call void @helper1() #[[ATTR1]] 34; CHECK-NEXT: call void @helper2() #[[ATTR1]] 35; CHECK-NEXT: call void @__kmpc_target_deinit() 36; CHECK-NEXT: ret void 37; 38 %i = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment, ptr %dyn) 39 call void @helper0() 40 call void @helper1() 41 call void @helper2() 42 call void @__kmpc_target_deinit() 43 ret void 44} 45 46define weak ptx_kernel void @kernel1(ptr %dyn) "kernel" #0 { 47; CHECK-LABEL: define {{[^@]+}}@kernel1 48; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 49; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment, ptr [[DYN]]) 50; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 51; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 52; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] 53; CHECK: exit.threads: 54; CHECK-NEXT: ret void 55; CHECK: main.thread.user_code: 56; CHECK-NEXT: call void @helper1() #[[ATTR1]] 57; CHECK-NEXT: call void @__kmpc_target_deinit() 58; CHECK-NEXT: ret void 59; 60 %i = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment, ptr %dyn) 61 call void @helper1() 62 call void @__kmpc_target_deinit() 63 ret void 64} 65 66define weak ptx_kernel void @kernel2(ptr %dyn) "kernel" #0 { 67; CHECK-LABEL: define {{[^@]+}}@kernel2 68; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 69; CHECK-NEXT: entry: 70; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 71; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment, ptr [[DYN]]) 72; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 73; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 74; CHECK: common.ret: 75; CHECK-NEXT: ret void 76; CHECK: user_code.entry: 77; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR1]] 78; CHECK-NEXT: call void @helper0() #[[ATTR1]] 79; CHECK-NEXT: call void @helper1() #[[ATTR1]] 80; CHECK-NEXT: call void @helper2() #[[ATTR1]] 81; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 82; CHECK-NEXT: call void @__kmpc_target_deinit() 83; CHECK-NEXT: ret void 84; 85entry: 86 %captured_vars_addrs = alloca [0 x ptr], align 8 87 %i = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment, ptr %dyn) 88 %exec_user_code = icmp eq i32 %i, -1 89 br i1 %exec_user_code, label %user_code.entry, label %common.ret 90 91common.ret: 92 ret void 93 94user_code.entry: 95 %0 = call i32 @__kmpc_global_thread_num(ptr null) 96 call void @helper0() 97 call void @helper1() 98 call void @helper2() 99 call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) 100 call void @__kmpc_target_deinit() 101 ret void 102} 103 104define internal void @helper0() { 105; CHECK-LABEL: define {{[^@]+}}@helper0 106; CHECK-SAME: () #[[ATTR1]] { 107; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] 108; CHECK: region.check.tid: 109; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 110; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 111; CHECK-NEXT: br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 112; CHECK: region.guarded: 113; CHECK-NEXT: store i32 666, ptr @G, align 4 114; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] 115; CHECK: region.guarded.end: 116; CHECK-NEXT: br label [[REGION_BARRIER]] 117; CHECK: region.barrier: 118; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]]) 119; CHECK-NEXT: br label [[REGION_EXIT:%.*]] 120; CHECK: region.exit: 121; CHECK-NEXT: ret void 122; 123 %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() 124 store i32 %threadLimit, ptr @G 125 ret void 126} 127 128define internal void @helper1() { 129; CHECK-LABEL: define {{[^@]+}}@helper1 130; CHECK-SAME: () #[[ATTR1]] { 131; CHECK-NEXT: br label [[F:%.*]] 132; CHECK: t: 133; CHECK-NEXT: unreachable 134; CHECK: f: 135; CHECK-NEXT: ret void 136; 137 %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() 138 %c = icmp eq i32 %threadLimit, 666 139 br i1 %c, label %f, label %t 140t: 141 call void @helper0() 142 ret void 143f: 144 ret void 145} 146 147define internal void @helper2() { 148; CHECK-LABEL: define {{[^@]+}}@helper2 149; CHECK-SAME: () #[[ATTR1]] { 150; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] 151; CHECK: region.check.tid: 152; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 153; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 154; CHECK-NEXT: br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 155; CHECK: region.guarded: 156; CHECK-NEXT: store i32 666, ptr @G, align 4 157; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] 158; CHECK: region.guarded.end: 159; CHECK-NEXT: br label [[REGION_BARRIER]] 160; CHECK: region.barrier: 161; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]]) 162; CHECK-NEXT: br label [[REGION_EXIT:%.*]] 163; CHECK: region.exit: 164; CHECK-NEXT: ret void 165; 166 %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() 167 store i32 %threadLimit, ptr @G 168 ret void 169} 170 171define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 172; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ 173; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { 174; CHECK-NEXT: entry: 175; CHECK-NEXT: ret void 176; 177entry: 178 ret void 179} 180 181define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) { 182; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 183; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) { 184; CHECK-NEXT: entry: 185; CHECK-NEXT: ret void 186; 187entry: 188 ret void 189} 190 191define internal i32 @__kmpc_get_hardware_num_threads_in_block() { 192 %ret = call i32 @__kmpc_get_hardware_num_threads_in_block_dummy() 193 ret i32 %ret 194} 195declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy() 196declare i32 @__kmpc_target_init(ptr, ptr) #1 197declare void @__kmpc_target_deinit() #1 198declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) 199declare i32 @__kmpc_global_thread_num(ptr) 200 201 202!llvm.module.flags = !{!0, !1} 203 204attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"} 205 206!0 = !{i32 7, !"openmp", i32 50} 207!1 = !{i32 7, !"openmp-device", i32 50} 208;. 209; CHECK: attributes #[[ATTR0]] = { "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } 210; CHECK: attributes #[[ATTR1]] = { nounwind } 211; CHECK: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } 212; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind } 213;. 214; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} 215; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 216;. 217