1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=CHECK 3; RUN: opt -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefixes=CHECK-DISABLE-SPMDIZATION 4; 5; __local int G; 6; 7; void leaf() { 8; G = 42; 9; } 10; 11; void spmd_helper() { 12; leaf(); 13; #pragma omp parallel 14; leaf(); 15; } 16; void spmd() { 17; #pragma omp target 18; spmd_helper(); 19; } 20; 21; void generic_helper() { 22; leaf(); 23; unknown(); 24; } 25; void generic() { 26; #pragma omp target 27; generic_helper(); 28; } 29; 30target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" 31target triple = "nvptx64" 32 33%struct.ident_t = type { i32, i32, i32, i32, ptr } 34%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 35%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 36 37@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 38@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 39@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 40@G = external addrspace(5) global i32, align 4 41@__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 42@__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 43 44;. 45; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 46; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 47; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 48; CHECK: @G = external addrspace(5) global i32, align 4 49; CHECK: @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 50; CHECK: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 51;. 52; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 53; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 54; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 55; CHECK-DISABLE-SPMDIZATION: @G = external addrspace(5) global i32, align 4 56; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 57; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } 58; CHECK-DISABLE-SPMDIZATION: @__omp_outlined___wrapper.ID = private constant i8 undef 59;. 60define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 { 61; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 62; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 63; CHECK-NEXT: entry: 64; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment, ptr [[DYN]]) 65; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 66; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 67; CHECK: user_code.entry: 68; CHECK-NEXT: call void @spmd_helper() #[[ATTR7:[0-9]+]] 69; CHECK-NEXT: call void @__kmpc_target_deinit() 70; CHECK-NEXT: ret void 71; CHECK: worker.exit: 72; CHECK-NEXT: ret void 73; 74; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 75; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 76; CHECK-DISABLE-SPMDIZATION-NEXT: entry: 77; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 78; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment, ptr [[DYN]]) 79; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 80; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 81; CHECK-DISABLE-SPMDIZATION: is_worker_check: 82; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 83; CHECK-DISABLE-SPMDIZATION-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 84; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 85; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 86; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 87; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin: 88; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 89; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 90; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 91; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 92; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 93; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished: 94; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 95; CHECK-DISABLE-SPMDIZATION: worker_state_machine.is_active.check: 96; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 97; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check: 98; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] 99; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.execute: 100; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP0]]) 101; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 102; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check1: 103; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 104; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.end: 105; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_kernel_end_parallel() 106; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 107; CHECK-DISABLE-SPMDIZATION: worker_state_machine.done.barrier: 108; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 109; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 110; CHECK-DISABLE-SPMDIZATION: thread.user_code.check: 111; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 112; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 113; CHECK-DISABLE-SPMDIZATION: user_code.entry: 114; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR7:[0-9]+]] 115; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit() 116; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 117; CHECK-DISABLE-SPMDIZATION: worker.exit: 118; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 119; 120entry: 121 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment, ptr %dyn) 122 %exec_user_code = icmp eq i32 %0, -1 123 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 124 125user_code.entry: ; preds = %entry 126 call void @spmd_helper() #5 127 call void @__kmpc_target_deinit() 128 ret void 129 130worker.exit: ; preds = %entry 131 ret void 132} 133 134; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. 135define weak i32 @__kmpc_target_init(ptr, ptr) { 136; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init 137; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 138; CHECK-NEXT: ret i32 0 139; 140; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init 141; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { 142; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0 143; 144 ret i32 0 145} 146 147declare void @__kmpc_target_deinit() 148 149; Function Attrs: convergent noinline norecurse nounwind 150define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 { 151; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 152; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 153; CHECK-NEXT: entry: 154; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 155; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr [[DYN]]) 156; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 157; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 158; CHECK: is_worker_check: 159; CHECK-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 160; CHECK-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 161; CHECK-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 162; CHECK-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 163; CHECK-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 164; CHECK: worker_state_machine.begin: 165; CHECK-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 166; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 167; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 168; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 169; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 170; CHECK: worker_state_machine.finished: 171; CHECK-NEXT: ret void 172; CHECK: worker_state_machine.is_active.check: 173; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 174; CHECK: worker_state_machine.parallel_region.fallback.execute: 175; CHECK-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 176; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 177; CHECK: worker_state_machine.parallel_region.end: 178; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() 179; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 180; CHECK: worker_state_machine.done.barrier: 181; CHECK-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 182; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 183; CHECK: thread.user_code.check: 184; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 185; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 186; CHECK: user_code.entry: 187; CHECK-NEXT: call void @generic_helper() #[[ATTR7]] 188; CHECK-NEXT: call void @__kmpc_target_deinit() 189; CHECK-NEXT: ret void 190; CHECK: worker.exit: 191; CHECK-NEXT: ret void 192; 193; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 194; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { 195; CHECK-DISABLE-SPMDIZATION-NEXT: entry: 196; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 197; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr [[DYN]]) 198; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 199; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 200; CHECK-DISABLE-SPMDIZATION: is_worker_check: 201; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 202; CHECK-DISABLE-SPMDIZATION-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 203; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 204; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 205; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 206; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin: 207; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 208; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) 209; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 210; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null 211; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 212; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished: 213; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 214; CHECK-DISABLE-SPMDIZATION: worker_state_machine.is_active.check: 215; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 216; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.fallback.execute: 217; CHECK-DISABLE-SPMDIZATION-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]]) 218; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 219; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.end: 220; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_kernel_end_parallel() 221; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 222; CHECK-DISABLE-SPMDIZATION: worker_state_machine.done.barrier: 223; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) 224; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 225; CHECK-DISABLE-SPMDIZATION: thread.user_code.check: 226; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 227; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 228; CHECK-DISABLE-SPMDIZATION: user_code.entry: 229; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR7]] 230; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit() 231; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 232; CHECK-DISABLE-SPMDIZATION: worker.exit: 233; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 234; 235entry: 236 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr %dyn) 237 %exec_user_code = icmp eq i32 %0, -1 238 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 239 240user_code.entry: ; preds = %entry 241 call void @generic_helper() #5 242 call void @__kmpc_target_deinit() 243 ret void 244 245worker.exit: ; preds = %entry 246 ret void 247} 248 249; Function Attrs: convergent noinline nounwind 250define internal void @spmd_helper() #1 { 251; CHECK-LABEL: define {{[^@]+}}@spmd_helper 252; CHECK-SAME: () #[[ATTR1:[0-9]+]] { 253; CHECK-NEXT: entry: 254; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 255; CHECK-NEXT: call void @leaf() #[[ATTR8:[0-9]+]] 256; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4:[0-9]+]] 257; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 258; CHECK-NEXT: ret void 259; 260; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@spmd_helper 261; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] { 262; CHECK-DISABLE-SPMDIZATION-NEXT: entry: 263; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 264; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR8:[0-9]+]] 265; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4:[0-9]+]] 266; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) 267; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 268; 269entry: 270 %captured_vars_addrs = alloca [0 x ptr], align 8 271 call void @leaf() 272 %0 = call i32 @__kmpc_global_thread_num(ptr @2) 273 call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) 274 ret void 275} 276 277; Function Attrs: convergent noinline norecurse nounwind 278define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { 279; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ 280; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { 281; CHECK-NEXT: entry: 282; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 283; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 284; CHECK-NEXT: call void @leaf() #[[ATTR8]] 285; CHECK-NEXT: ret void 286; 287; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__ 288; CHECK-DISABLE-SPMDIZATION-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { 289; CHECK-DISABLE-SPMDIZATION-NEXT: entry: 290; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 291; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 292; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR8]] 293; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 294; 295entry: 296 %.global_tid..addr = alloca ptr, align 8 297 %.bound_tid..addr = alloca ptr, align 8 298 store ptr %.global_tid., ptr %.global_tid..addr, align 8 299 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 300 call void @leaf() 301 ret void 302} 303 304; Function Attrs: convergent noinline norecurse nounwind 305define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { 306; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 307; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 308; CHECK-NEXT: entry: 309; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 310; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 311; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 312; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 313; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 314; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR8]] 315; CHECK-NEXT: ret void 316; 317; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 318; CHECK-DISABLE-SPMDIZATION-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 319; CHECK-DISABLE-SPMDIZATION-NEXT: entry: 320; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 321; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 322; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 323; CHECK-DISABLE-SPMDIZATION-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 324; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 325; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR8]] 326; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 327; 328entry: 329 %.addr = alloca i16, align 2 330 %.addr1 = alloca i32, align 4 331 %.zero.addr = alloca i32, align 4 332 %global_args = alloca ptr, align 8 333 store i16 %0, ptr %.addr, align 2 334 store i32 %1, ptr %.addr1, align 4 335 store i32 0, ptr %.zero.addr, align 4 336 call void @__kmpc_get_shared_variables(ptr %global_args) 337 call void @__omp_outlined__(ptr %.addr1, ptr %.zero.addr) #3 338 ret void 339} 340 341declare void @__kmpc_get_shared_variables(ptr) 342 343; Function Attrs: nounwind 344declare i32 @__kmpc_global_thread_num(ptr) #3 345 346; Function Attrs: alwaysinline 347declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4 348 349declare void @unknown() 350 351; Function Attrs: convergent noinline nounwind 352define internal void @leaf() #1 { 353; CHECK-LABEL: define {{[^@]+}}@leaf 354; CHECK-SAME: () #[[ATTR6:[0-9]+]] { 355; CHECK-NEXT: entry: 356; CHECK-NEXT: store i32 42, ptr addrspace(5) @G, align 4 357; CHECK-NEXT: ret void 358; 359; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@leaf 360; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR6:[0-9]+]] { 361; CHECK-DISABLE-SPMDIZATION-NEXT: entry: 362; CHECK-DISABLE-SPMDIZATION-NEXT: store i32 42, ptr addrspace(5) @G, align 4 363; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 364; 365entry: 366 store i32 42, ptr addrspace(5) @G, align 4 367 ret void 368} 369 370; Function Attrs: convergent noinline nounwind 371define internal void @generic_helper() #1 { 372; CHECK-LABEL: define {{[^@]+}}@generic_helper 373; CHECK-SAME: () #[[ATTR1]] { 374; CHECK-NEXT: entry: 375; CHECK-NEXT: call void @unknown() 376; CHECK-NEXT: call void @leaf() #[[ATTR8]] 377; CHECK-NEXT: ret void 378; 379; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper 380; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1]] { 381; CHECK-DISABLE-SPMDIZATION-NEXT: entry: 382; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() 383; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR8]] 384; CHECK-DISABLE-SPMDIZATION-NEXT: ret void 385; 386entry: 387 call void @unknown() 388 call void @leaf() 389 ret void 390} 391 392attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 393attributes #1 = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 394attributes #2 = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 395attributes #3 = { nounwind } 396attributes #4 = { alwaysinline } 397attributes #5 = { convergent } 398 399!omp_offload.info = !{!0, !1} 400!llvm.module.flags = !{!4, !5, !6, !7, !8} 401!llvm.ident = !{!9} 402 403!0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} 404!1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} 405!4 = !{i32 1, !"wchar_size", i32 4} 406!5 = !{i32 7, !"openmp", i32 50} 407!6 = !{i32 7, !"openmp-device", i32 50} 408!7 = !{i32 8, !"PIC Level", i32 2} 409!8 = !{i32 7, !"frame-pointer", i32 2} 410!9 = !{!"clang version 14.0.0"} 411;. 412; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 413; CHECK: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 414; CHECK: attributes #[[ATTR2]] = { norecurse nosync memory(write) } 415; CHECK: attributes #[[ATTR3]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 416; CHECK: attributes #[[ATTR4]] = { nounwind } 417; CHECK: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } 418; CHECK: attributes #[[ATTR6]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 419; CHECK: attributes #[[ATTR7]] = { convergent nounwind } 420; CHECK: attributes #[[ATTR8]] = { nosync nounwind memory(write) } 421;. 422; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 423; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 424; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { norecurse nosync memory(write) } 425; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 426; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { nounwind } 427; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } 428; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 429; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { convergent nounwind } 430; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR8]] = { nosync nounwind memory(write) } 431;. 432; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} 433; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} 434; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 435; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} 436; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 437; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 438; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 439; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 440;. 441; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} 442; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} 443; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 444; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} 445; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 446; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} 447; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 448; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} 449;. 450