1; See ./README.md for how to maintain the LLVM IR in this test. 2 3; REQUIRES: nvptx-registered-target 4 5; RUN: opt -pass-remarks=kernel-info -passes=kernel-info \ 6; RUN: -disable-output %s 2>&1 | \ 7; RUN: FileCheck -match-full-lines %s 8 9; CHECK-NOT: remark: 10; CHECK: remark: test.c:0:0: in artificial function '[[OFF_FUNC:__omp_offloading_[a-f0-9_]*_h_l12]]_debug__', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes 11; CHECK-NEXT: remark: test.c:14:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'i' with static size of 4 bytes 12; CHECK-NEXT: remark: test.c:15:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'a' with static size of 8 bytes 13; CHECK-NEXT: remark: <unknown>:0:0: in artificial function '[[OFF_FUNC]]_debug__', 'store' instruction accesses memory in flat address space 14; CHECK-NEXT: remark: test.c:13:3: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is '@__kmpc_target_init' 15; CHECK-NEXT: remark: test.c:16:5: in artificial function '[[OFF_FUNC]]_debug__', direct call, callee is '@f' 16; CHECK-NEXT: remark: test.c:17:5: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is 'g' 17; CHECK-NEXT: remark: test.c:18:3: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is '@__kmpc_target_deinit' 18; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', ExternalNotKernel = 0 19; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Allocas = 3 20; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasStaticSizeSum = 20 21; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasDyn = 0 22; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCalls = 4 23; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', IndirectCalls = 0 24; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCallsToDefinedFunctions = 3 25; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', InlineAssemblyCalls = 0 26; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Invokes = 0 27; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', FlatAddrspaceAccesses = 1 28 29; CHECK-NEXT: remark: test.c:0:0: in artificial function '[[OFF_FUNC]]', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes 30; CHECK-NEXT: remark: <unknown>:0:0: in artificial function '[[OFF_FUNC]]', 'store' instruction accesses memory in flat address space 31; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', 'load' instruction ('%[[#]]') accesses memory in flat address space 32; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', direct call to defined function, callee is artificial '[[OFF_FUNC]]_debug__' 33; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', ExternalNotKernel = 0 34; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', omp_target_thread_limit = 128 35; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', maxntidx = 128 36; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Allocas = 1 37; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasStaticSizeSum = 8 38; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasDyn = 0 39; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCalls = 1 40; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', IndirectCalls = 0 41; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCallsToDefinedFunctions = 1 42; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', InlineAssemblyCalls = 0 43; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Invokes = 0 44; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', FlatAddrspaceAccesses = 2 45 46; CHECK-NEXT: remark: test.c:4:7: in function 'g', alloca ('%[[#]]') for 'i' with static size of 4 bytes 47; CHECK-NEXT: remark: test.c:5:7: in function 'g', alloca ('%[[#]]') for 'a' with static size of 8 bytes 48; CHECK-NEXT: remark: test.c:6:3: in function 'g', direct call, callee is '@f' 49; CHECK-NEXT: remark: test.c:7:3: in function 'g', direct call to defined function, callee is 'g' 50; CHECK-NEXT: remark: test.c:3:0: in function 'g', ExternalNotKernel = 1 51; CHECK-NEXT: remark: test.c:3:0: in function 'g', Allocas = 2 52; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasStaticSizeSum = 12 53; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasDyn = 0 54; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCalls = 2 55; CHECK-NEXT: remark: test.c:3:0: in function 'g', IndirectCalls = 0 56; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCallsToDefinedFunctions = 1 57; CHECK-NEXT: remark: test.c:3:0: in function 'g', InlineAssemblyCalls = 0 58; CHECK-NEXT: remark: test.c:3:0: in function 'g', Invokes = 0 59; CHECK-NEXT: remark: test.c:3:0: in function 'g', FlatAddrspaceAccesses = 0 60; CHECK-NOT: remark: {{.*: in function 'g',.*}} 61 62; A lot of internal functions (e.g., __kmpc_target_init) come next, but we don't 63; want to maintain a list of their allocas, calls, etc. in this test. 64 65; ModuleID = 'test-openmp-nvptx64-nvidia-cuda-sm_70.bc' 66source_filename = "test.c" 67target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" 68target triple = "nvptx64-nvidia-cuda" 69 70%struct.ident_t = type { i32, i32, i32, i32, ptr } 71%struct.DynamicEnvironmentTy = type { i16 } 72%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 73%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 74%struct.DeviceMemoryPoolTy = type { ptr, i64 } 75%struct.DeviceMemoryPoolTrackingTy = type { i64, i64, i64, i64 } 76%struct.DeviceEnvironmentTy = type { i32, i32, i32, i32, i64, i64, i64, i64 } 77%"struct.rpc::Client" = type { %"struct.rpc::Process" } 78%"struct.rpc::Process" = type { i32, ptr, ptr, ptr, ptr, [128 x i32] } 79%"struct.(anonymous namespace)::SharedMemorySmartStackTy" = type { [512 x i8], [1024 x i8] } 80%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } 81%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32, i32 } 82 83@__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 84@__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 85@0 = private unnamed_addr constant [58 x i8] c";test.c;__omp_offloading_fd02_1116d6_h_l12_debug__;13;3;;\00", align 1 86@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 57, ptr @0 }, align 8 87@__omp_offloading_fd02_1116d6_h_l12_dynamic_environment = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer 88@__omp_offloading_fd02_1116d6_h_l12_kernel_environment = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 128, i32 -1, i32 -1, i32 0, i32 0 }, ptr @1, ptr @__omp_offloading_fd02_1116d6_h_l12_dynamic_environment } 89@llvm.used = appending global [4 x ptr] [ptr @__llvm_rpc_client, ptr addrspacecast (ptr addrspace(4) @__omp_rtl_device_environment to ptr), ptr @__omp_rtl_device_memory_pool, ptr @__omp_rtl_device_memory_pool_tracker], section "llvm.metadata" 90@__omp_rtl_device_memory_pool = weak protected global %struct.DeviceMemoryPoolTy zeroinitializer, align 8 91@__omp_rtl_device_memory_pool_tracker = weak protected global %struct.DeviceMemoryPoolTrackingTy zeroinitializer, align 8 92@__omp_rtl_debug_kind = weak_odr hidden constant i32 0 93@__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 94@__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 95@__omp_rtl_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 8 96@.str = private unnamed_addr constant [40 x i8] c"%s:%u: %s: Assertion %s (`%s`) failed.\0A\00", align 1 97@.str1 = private unnamed_addr constant [35 x i8] c"%s:%u: %s: Assertion `%s` failed.\0A\00", align 1 98@.str15 = private unnamed_addr constant [43 x i8] c"/tmp/llvm/offload/DeviceRTL/src/Kernel.cpp\00", align 1 99@__PRETTY_FUNCTION__._ZL19genericStateMachineP7IdentTy = private unnamed_addr constant [36 x i8] c"void genericStateMachine(IdentTy *)\00", align 1 100@.str2 = private unnamed_addr constant [18 x i8] c"WorkFn == nullptr\00", align 1 101@__PRETTY_FUNCTION__.__kmpc_target_deinit = private unnamed_addr constant [28 x i8] c"void __kmpc_target_deinit()\00", align 1 102@IsSPMDMode = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 103@__llvm_rpc_client = weak protected global %"struct.rpc::Client" zeroinitializer, align 8 104@.str1027 = private unnamed_addr constant [48 x i8] c"/tmp/llvm/offload/DeviceRTL/src/Parallelism.cpp\00", align 1 105@.str12 = private unnamed_addr constant [23 x i8] c"!mapping::isSPMDMode()\00", align 1 106@__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel = private unnamed_addr constant [34 x i8] c"void __kmpc_kernel_end_parallel()\00", align 1 107@_ZL20KernelEnvironmentPtr = internal unnamed_addr addrspace(3) global ptr undef, align 8 108@_ZL26KernelLaunchEnvironmentPtr = internal unnamed_addr addrspace(3) global ptr undef, align 8 109@_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16 110@.str444 = private unnamed_addr constant [42 x i8] c"/tmp/llvm/offload/DeviceRTL/src/State.cpp\00", align 1 111@.str747 = private unnamed_addr constant [33 x i8] c"NThreadsVar == Other.NThreadsVar\00", align 1 112@__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_ = private unnamed_addr constant [68 x i8] c"void ompx::state::ICVStateTy::assertEqual(const ICVStateTy &) const\00", align 1 113@.str848 = private unnamed_addr constant [27 x i8] c"LevelVar == Other.LevelVar\00", align 1 114@.str949 = private unnamed_addr constant [39 x i8] c"ActiveLevelVar == Other.ActiveLevelVar\00", align 1 115@.str1050 = private unnamed_addr constant [47 x i8] c"MaxActiveLevelsVar == Other.MaxActiveLevelsVar\00", align 1 116@.str1151 = private unnamed_addr constant [33 x i8] c"RunSchedVar == Other.RunSchedVar\00", align 1 117@.str1252 = private unnamed_addr constant [43 x i8] c"RunSchedChunkVar == Other.RunSchedChunkVar\00", align 1 118@.str13 = private unnamed_addr constant [43 x i8] c"ParallelTeamSize == Other.ParallelTeamSize\00", align 1 119@__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_ = private unnamed_addr constant [64 x i8] c"void ompx::state::TeamStateTy::assertEqual(TeamStateTy &) const\00", align 1 120@.str14 = private unnamed_addr constant [39 x i8] c"HasThreadState == Other.HasThreadState\00", align 1 121@.str23 = private unnamed_addr constant [32 x i8] c"mapping::isSPMDMode() == IsSPMD\00", align 1 122@__PRETTY_FUNCTION__._ZN4ompx5state18assumeInitialStateEb = private unnamed_addr constant [43 x i8] c"void ompx::state::assumeInitialState(bool)\00", align 1 123@_ZL9ThreadDST = internal unnamed_addr addrspace(3) global ptr undef, align 8 124@_ZN4ompx5state9TeamStateE = internal local_unnamed_addr addrspace(3) global %"struct.ompx::state::TeamStateTy" undef, align 8 125@_ZN4ompx5state12ThreadStatesE = internal addrspace(3) global ptr undef, align 8 126 127; Function Attrs: convergent noinline norecurse nounwind optnone 128define internal void @__omp_offloading_fd02_1116d6_h_l12_debug__(ptr noalias noundef %0) #0 !dbg !18 { 129 %2 = alloca ptr, align 8 130 %3 = alloca i32, align 4 131 %4 = alloca [2 x i32], align 4 132 store ptr %0, ptr %2, align 8 133 #dbg_declare(ptr %2, !25, !DIExpression(), !26) 134 %5 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_1116d6_h_l12_kernel_environment, ptr %0), !dbg !27 135 %6 = icmp eq i32 %5, -1, !dbg !27 136 br i1 %6, label %7, label %8, !dbg !27 137 1387: ; preds = %1 139 #dbg_declare(ptr %3, !28, !DIExpression(), !31) 140 #dbg_declare(ptr %4, !32, !DIExpression(), !36) 141 call void @f() #19, !dbg !37 142 call void @g() #19, !dbg !38 143 call void @__kmpc_target_deinit(), !dbg !39 144 ret void, !dbg !40 145 1468: ; preds = %1 147 ret void, !dbg !27 148} 149 150; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone 151define weak_odr protected ptx_kernel void @__omp_offloading_fd02_1116d6_h_l12(ptr noalias noundef %0) #1 !dbg !41 { 152 %2 = alloca ptr, align 8 153 store ptr %0, ptr %2, align 8 154 #dbg_declare(ptr %2, !42, !DIExpression(), !43) 155 %3 = load ptr, ptr %2, align 8, !dbg !44 156 call void @__omp_offloading_fd02_1116d6_h_l12_debug__(ptr %3) #20, !dbg !44 157 ret void, !dbg !44 158} 159 160; Function Attrs: convergent 161declare void @f(...) #2 162 163; Function Attrs: convergent noinline nounwind optnone 164define hidden void @g() #3 !dbg !45 { 165 %1 = alloca i32, align 4 166 %2 = alloca [2 x i32], align 4 167 #dbg_declare(ptr %1, !48, !DIExpression(), !49) 168 #dbg_declare(ptr %2, !50, !DIExpression(), !51) 169 call void @f() #19, !dbg !52 170 call void @g() #19, !dbg !53 171 ret void, !dbg !54 172} 173 174; Function Attrs: convergent mustprogress nounwind 175define internal noundef range(i32 -1, 1024) i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(48) %0, ptr nofree noundef nonnull align 8 dereferenceable(16) %1) #4 { 176 %3 = alloca ptr, align 8 177 %4 = getelementptr inbounds nuw i8, ptr %0, i64 2 178 %5 = load i8, ptr %4, align 2, !tbaa !55 179 %6 = and i8 %5, 2 180 %7 = icmp eq i8 %6, 0 181 %8 = load i8, ptr %0, align 8, !tbaa !61 182 %9 = icmp ne i8 %8, 0 183 br i1 %7, label %21, label %10 184 18510: ; preds = %2 186 %11 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() 187 %12 = icmp eq i32 %11, 0 188 br i1 %12, label %13, label %14 189 19013: ; preds = %10 191 store i32 1, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 192 store i8 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512) to ptr addrspace(3)), align 1, !tbaa !63 193 tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(48) addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i8 noundef 0, i64 noundef 16, i1 noundef false) 194 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !64 195 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !69 196 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !70 197 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71 198 store i32 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72 199 store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !73 200 store ptr null, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 201 store ptr %0, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76 202 store ptr %1, ptr addrspace(3) @_ZL26KernelLaunchEnvironmentPtr, align 8, !tbaa !78 203 br label %18 204 20514: ; preds = %10 206 %15 = zext nneg i32 %11 to i64 207 %16 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %15 208 %17 = addrspacecast ptr %16 to ptr addrspace(3) 209 store i8 0, ptr addrspace(3) %17, align 1, !tbaa !63 210 br label %18 211 21218: ; preds = %14, %13 213 br i1 %12, label %19, label %20 214 21519: ; preds = %18 216 store ptr null, ptr addrspace(3) @_ZL9ThreadDST, align 8, !tbaa !80 217 br label %20 218 21920: ; preds = %18, %19 220 tail call void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 poison) #21 221 br label %37 222 22321: ; preds = %2 224 %22 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 225 %23 = add nsw i32 %22, -1 226 %24 = and i32 %23, -32 227 %25 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() 228 %26 = icmp eq i32 %25, %24 229 br i1 %26, label %27, label %31 230 23127: ; preds = %21 232 store i32 0, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 233 %28 = zext nneg i32 %25 to i64 234 %29 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %28 235 %30 = addrspacecast ptr %29 to ptr addrspace(3) 236 store i8 0, ptr addrspace(3) %30, align 1, !tbaa !63 237 tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(48) addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i8 noundef 0, i64 noundef 16, i1 noundef false) 238 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !64 239 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !69 240 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !70 241 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71 242 store i32 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72 243 store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !73 244 store ptr null, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 245 store ptr %0, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76 246 store ptr %1, ptr addrspace(3) @_ZL26KernelLaunchEnvironmentPtr, align 8, !tbaa !78 247 br label %35 248 24931: ; preds = %21 250 %32 = zext nneg i32 %25 to i64 251 %33 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %32 252 %34 = addrspacecast ptr %33 to ptr addrspace(3) 253 store i8 0, ptr addrspace(3) %34, align 1, !tbaa !63 254 br label %35 255 25635: ; preds = %31, %27 257 br i1 %26, label %36, label %37 258 25936: ; preds = %35 260 store ptr null, ptr addrspace(3) @_ZL9ThreadDST, align 8, !tbaa !80 261 br label %37 262 26337: ; preds = %36, %35, %20 264 br i1 %7, label %100, label %38 265 26638: ; preds = %37 267 %39 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62 268 %40 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83 269 %41 = and i32 %39, 1 270 %42 = and i32 %41, %40 271 %43 = icmp ne i32 %42, 0 272 %44 = load i32, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8, !tbaa !86 273 %45 = icmp ne i32 %44, 0 274 %46 = select i1 %43, i1 %45, i1 false 275 br i1 %46, label %47, label %48 276 27747: ; preds = %38 278 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(33) @.str747, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 193, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 279 unreachable 280 28148: ; preds = %38 282 %49 = icmp eq i32 %44, 0 283 tail call void @llvm.assume(i1 noundef %49) #23 284 %50 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 4) to ptr addrspace(3)), align 4, !tbaa !87 285 br i1 %43, label %51, label %54 286 28751: ; preds = %48 288 %52 = icmp eq i32 %50, 0 289 br i1 %52, label %54, label %53 290 29153: ; preds = %51 292 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(27) @.str848, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 194, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 293 unreachable 294 29554: ; preds = %51, %48 296 %55 = phi i32 [ 0, %51 ], [ %50, %48 ] 297 %56 = icmp eq i32 %55, 0 298 tail call void @llvm.assume(i1 noundef %56) #23 299 %57 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 8) to ptr addrspace(3)), align 8, !tbaa !88 300 br i1 %43, label %58, label %61 301 30258: ; preds = %54 303 %59 = icmp eq i32 %57, 0 304 br i1 %59, label %61, label %60 305 30660: ; preds = %58 307 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(39) @.str949, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 195, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 308 unreachable 309 31061: ; preds = %58, %54 311 %62 = phi i32 [ 0, %58 ], [ %57, %54 ] 312 %63 = icmp eq i32 %62, 0 313 tail call void @llvm.assume(i1 noundef %63) #23 314 %64 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !89 315 br i1 %43, label %65, label %68 316 31765: ; preds = %61 318 %66 = icmp eq i32 %64, 1 319 br i1 %66, label %68, label %67 320 32167: ; preds = %65 322 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(47) @.str1050, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 196, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 323 unreachable 324 32568: ; preds = %65, %61 326 %69 = phi i32 [ 1, %65 ], [ %64, %61 ] 327 %70 = icmp eq i32 %69, 1 328 tail call void @llvm.assume(i1 noundef %70) #23 329 %71 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !90 330 br i1 %43, label %72, label %93 331 33272: ; preds = %68 333 %73 = icmp eq i32 %71, 1 334 br i1 %73, label %75, label %74 335 33674: ; preds = %72 337 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(33) @.str1151, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 197, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 338 unreachable 339 34075: ; preds = %72 341 %76 = icmp eq i32 1, 1 342 tail call void @llvm.assume(i1 noundef %76) #23 343 br i1 %43, label %77, label %95 344 34577: ; preds = %75 346 %78 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !91 347 %79 = icmp eq i32 %78, 1 348 br i1 %79, label %81, label %80 349 35080: ; preds = %77 351 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(43) @.str1252, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 198, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 352 unreachable 353 35481: ; preds = %77 355 %82 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71 356 %83 = icmp eq i32 %82, 1 357 br i1 %83, label %85, label %84 358 35984: ; preds = %81 360 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(43) @.str13, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 222, ptr noundef nonnull dereferenceable(64) @__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_) #22 361 unreachable 362 36385: ; preds = %81 364 %86 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72 365 %87 = icmp eq i32 %86, 0 366 br i1 %87, label %89, label %88 367 36888: ; preds = %85 369 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(39) @.str14, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 223, ptr noundef nonnull dereferenceable(64) @__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_) #22 370 unreachable 371 37289: ; preds = %85 373 %90 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 374 %91 = icmp eq i32 %90, 0 375 br i1 %91, label %92, label %98 376 37792: ; preds = %89 378 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(32) @.str23, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 326, ptr noundef nonnull dereferenceable(43) @__PRETTY_FUNCTION__._ZN4ompx5state18assumeInitialStateEb) #22 379 unreachable 380 38193: ; preds = %68 382 %94 = icmp eq i32 %71, 1 383 tail call void @llvm.assume(i1 noundef %94) #23 384 br label %95 385 38695: ; preds = %75, %93 387 %96 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 388 %97 = icmp ne i32 %96, 0 389 br label %98 390 39198: ; preds = %89, %95 392 %99 = phi i1 [ %97, %95 ], [ true, %89 ] 393 tail call void @llvm.assume(i1 noundef %99) #23 394 tail call void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 poison) #21 395 br label %130 396 397100: ; preds = %37 398 %101 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 399 %102 = add nsw i32 %101, -1 400 %103 = and i32 %102, -32 401 %104 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !92 402 %105 = icmp eq i32 %104, %103 403 br i1 %105, label %130, label %106 404 405106: ; preds = %100 406 %107 = add nsw i32 %101, -32 407 %108 = icmp ult i32 %104, %107 408 %109 = select i1 %9, i1 %108, i1 false 409 br i1 %109, label %110, label %130 410 411110: ; preds = %106 412 %111 = load i32, ptr @__omp_rtl_debug_kind, align 4 413 %112 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8 414 %113 = and i32 %111, 1 415 %114 = and i32 %113, %112 416 %115 = icmp ne i32 %114, 0 417 br label %116 418 419116: ; preds = %110, %128 420 call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %3) #20 421 tail call void @llvm.nvvm.barrier.sync(i32 noundef 8) 422 %117 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %3) #20 423 %118 = load ptr, ptr %3, align 8, !tbaa !93 424 %119 = icmp eq ptr %118, null 425 br i1 %119, label %129, label %120 426 427120: ; preds = %116 428 br i1 %117, label %121, label %128 429 430121: ; preds = %120 431 %122 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 432 %123 = icmp ne i32 %122, 0 433 %124 = select i1 %115, i1 %123, i1 false 434 br i1 %124, label %125, label %126 435 436125: ; preds = %121 437 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(67) @.str15, i32 noundef 60, ptr noundef nonnull dereferenceable(36) @__PRETTY_FUNCTION__._ZL19genericStateMachineP7IdentTy) #22 438 unreachable 439 440126: ; preds = %121 441 %127 = icmp eq i32 %122, 0 442 tail call void @llvm.assume(i1 noundef %127) #23 443 tail call void %118(i32 noundef 0, i32 noundef %104) #24 444 tail call void @__kmpc_kernel_end_parallel() #24 445 br label %128 446 447128: ; preds = %126, %120 448 tail call void @llvm.nvvm.barrier.sync(i32 noundef 8) 449 call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20 450 br label %116, !llvm.loop !94 451 452129: ; preds = %116 453 call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20 454 br label %130 455 456130: ; preds = %106, %129, %100, %98 457 %131 = phi i32 [ -1, %98 ], [ -1, %100 ], [ %104, %129 ], [ %104, %106 ] 458 ret i32 %131 459} 460 461; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) 462declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #5 463 464; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) 465declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #6 466 467; Function Attrs: convergent mustprogress noinline norecurse nounwind 468define internal void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 %0) local_unnamed_addr #7 { 469 tail call void @llvm.nvvm.barrier0() #25 470 ret void 471} 472 473; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) 474declare noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #5 475 476; Function Attrs: cold convergent mustprogress noreturn nounwind 477define internal fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(8) %0, ptr noundef %1, ptr noundef nonnull dereferenceable(66) %2, i32 noundef range(i32 60, 905) %3, ptr noundef nonnull dereferenceable(20) %4) unnamed_addr #8 { 478 %6 = icmp eq ptr %1, null 479 br i1 %6, label %9, label %7 480 4817: ; preds = %5 482 %8 = tail call noundef i32 (ptr, ...) @_ZN4ompx6printfEPKcz(ptr noundef nonnull dereferenceable(40) @.str, ptr noundef nonnull dereferenceable(66) %2, i32 noundef %3, ptr noundef nonnull dereferenceable(20) %4, ptr noundef nonnull %1, ptr noundef nonnull dereferenceable(8) %0) #24 483 br label %11 484 4859: ; preds = %5 486 %10 = tail call noundef i32 (ptr, ...) @_ZN4ompx6printfEPKcz(ptr noundef nonnull dereferenceable(35) @.str1, ptr noundef nonnull dereferenceable(66) %2, i32 noundef %3, ptr noundef nonnull dereferenceable(20) %4, ptr noundef nonnull dereferenceable(8) %0) #24 487 br label %11 488 48911: ; preds = %9, %7 490 tail call void @llvm.trap() #26 491 unreachable 492} 493 494; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) 495declare void @llvm.assume(i1 noundef) #9 496 497; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 498declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #10 499 500; Function Attrs: convergent nocallback nounwind 501declare void @llvm.nvvm.barrier.sync(i32) #11 502 503; Function Attrs: convergent mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: write, inaccessiblemem: none) 504define internal noundef zeroext i1 @__kmpc_kernel_parallel(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) initializes((0, 8)) %0) local_unnamed_addr #12 { 505 %2 = load ptr, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !93 506 store ptr %2, ptr %0, align 8, !tbaa !93 507 %3 = icmp eq ptr %2, null 508 br i1 %3, label %15, label %4 509 5104: ; preds = %1 511 %5 = tail call noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() #27, !range !92 512 %6 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !62 513 %7 = icmp eq i32 %6, 0 514 %8 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 515 %9 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 516 %10 = icmp eq i32 %9, 0 517 %11 = select i1 %10, i32 -32, i32 0 518 %12 = add nsw i32 %11, %8 519 %13 = select i1 %7, i32 %12, i32 %6 520 %14 = icmp ult i32 %5, %13 521 br label %15 522 52315: ; preds = %4, %1 524 %16 = phi i1 [ %14, %4 ], [ false, %1 ] 525 ret i1 %16 526} 527 528; Function Attrs: convergent mustprogress noinline nounwind 529define internal void @__kmpc_kernel_end_parallel() local_unnamed_addr #13 { 530 %1 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62 531 %2 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83 532 %3 = and i32 %1, 1 533 %4 = and i32 %3, %2 534 %5 = icmp ne i32 %4, 0 535 %6 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 536 %7 = icmp ne i32 %6, 0 537 %8 = select i1 %5, i1 %7, i1 false 538 br i1 %8, label %9, label %10 539 5409: ; preds = %0 541 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(72) @.str1027, i32 noundef 299, ptr noundef nonnull dereferenceable(34) @__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel) #22 542 unreachable 543 54410: ; preds = %0 545 %11 = icmp eq i32 %6, 0 546 tail call void @llvm.assume(i1 noundef %11) #23 547 %12 = load i32, ptr @__omp_rtl_assume_no_thread_state, align 4, !tbaa !62 548 %13 = icmp eq i32 %12, 0 549 %14 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8 550 %15 = icmp ne i32 %14, 0 551 %16 = select i1 %13, i1 %15, i1 false 552 br i1 %16, label %17, label %30 553 55417: ; preds = %10 555 %18 = tail call noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() #27, !range !92 556 %19 = load ptr, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 557 %20 = zext nneg i32 %18 to i64 558 %21 = getelementptr inbounds nuw ptr, ptr %19, i64 %20 559 %22 = load ptr, ptr %21, align 8, !tbaa !96 560 %23 = icmp eq ptr %22, null 561 br i1 %23, label %30, label %24, !prof !98 562 56324: ; preds = %17 564 %25 = getelementptr inbounds nuw i8, ptr %22, i64 32 565 %26 = load ptr, ptr %25, align 8, !tbaa !99 566 tail call void @free(ptr noundef nonnull dereferenceable(40) %22) #28 567 %27 = load ptr, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 568 %28 = getelementptr inbounds nuw ptr, ptr %27, i64 %20 569 store ptr %26, ptr %28, align 8, !tbaa !96 570 %29 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 571 br label %30 572 57330: ; preds = %10, %17, %24 574 %31 = phi i32 [ 0, %10 ], [ 0, %17 ], [ %29, %24 ] 575 %32 = icmp ne i32 %31, 0 576 %33 = select i1 %5, i1 %32, i1 false 577 br i1 %33, label %34, label %35 578 57934: ; preds = %30 580 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(72) @.str1027, i32 noundef 302, ptr noundef nonnull dereferenceable(34) @__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel) #22 581 unreachable 582 58335: ; preds = %30 584 %36 = icmp eq i32 %31, 0 585 tail call void @llvm.assume(i1 noundef %36) #23 586 ret void 587} 588 589; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 590declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #10 591 592; Function Attrs: convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) 593declare extern_weak void @free(ptr allocptr nocapture noundef) local_unnamed_addr #14 594 595; Function Attrs: convergent mustprogress nounwind 596define internal noundef i32 @_ZN4ompx6printfEPKcz(ptr noundef %0, ...) local_unnamed_addr #15 { 597 %2 = alloca ptr, align 8 598 call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 %2) #29 599 call void @llvm.va_start.p0(ptr noundef nonnull align 8 %2) #27 600 %3 = load ptr, ptr %2, align 8, !tbaa !101 601 %4 = call i32 @vprintf(ptr noundef %0, ptr noundef %3) #24 602 call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %2) #20 603 ret i32 %4 604} 605 606; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write) 607declare void @llvm.trap() #16 608 609; Function Attrs: nocallback nofree nosync nounwind willreturn 610declare void @llvm.va_start.p0(ptr) #17 611 612; Function Attrs: convergent nounwind 613declare i32 @vprintf(ptr noundef, ptr noundef) local_unnamed_addr #18 614 615; Function Attrs: convergent nocallback nounwind 616declare void @llvm.nvvm.barrier0() #11 617 618; Function Attrs: convergent mustprogress nounwind 619define internal void @__kmpc_target_deinit() #4 { 620 %1 = alloca ptr, align 8 621 %2 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 622 %3 = icmp eq i32 %2, 0 623 br i1 %3, label %4, label %27 624 6254: ; preds = %0 626 %5 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 627 %6 = add nsw i32 %5, -1 628 %7 = and i32 %6, -32 629 %8 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !92 630 %9 = icmp eq i32 %8, %7 631 br i1 %9, label %10, label %11 632 63310: ; preds = %4 634 store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !93 635 br label %27 636 63711: ; preds = %4 638 %12 = load ptr, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76 639 %13 = load i8, ptr %12, align 8, !tbaa !103 640 %14 = icmp eq i8 %13, 0 641 br i1 %14, label %15, label %27 642 64315: ; preds = %11 644 call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %1) #29 645 %16 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %1) #20 646 %17 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62 647 %18 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83 648 %19 = and i32 %17, 1 649 %20 = and i32 %19, %18 650 %21 = icmp eq i32 %20, 0 651 %22 = load ptr, ptr %1, align 8 652 %23 = icmp eq ptr %22, null 653 %24 = select i1 %21, i1 true, i1 %23 654 br i1 %24, label %26, label %25 655 65625: ; preds = %15 657 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(18) @.str2, ptr noundef null, ptr noundef nonnull dereferenceable(67) @.str15, i32 noundef 152, ptr noundef nonnull dereferenceable(28) @__PRETTY_FUNCTION__.__kmpc_target_deinit) #22 658 unreachable 659 66026: ; preds = %15 661 tail call void @llvm.assume(i1 noundef %23) #23 662 call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %1) #20 663 br label %27 664 66527: ; preds = %26, %11, %10, %0 666 ret void 667} 668 669attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } 670attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="128" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } 671attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } 672attributes #3 = { convergent noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } 673attributes #4 = { convergent mustprogress nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 674attributes #5 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 675attributes #6 = { nocallback nofree nounwind willreturn memory(argmem: write) } 676attributes #7 = { convergent mustprogress noinline norecurse nounwind "frame-pointer"="all" "llvm.assume"="ompx_aligned_barrier" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 677attributes #8 = { cold convergent mustprogress noreturn nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 678attributes #9 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } 679attributes #10 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 680attributes #11 = { convergent nocallback nounwind } 681attributes #12 = { convergent mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: write, inaccessiblemem: none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 682attributes #13 = { convergent mustprogress noinline nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 683attributes #14 = { convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 684attributes #15 = { convergent mustprogress nounwind "frame-pointer"="all" "no-builtin-printf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 685attributes #16 = { cold noreturn nounwind memory(inaccessiblemem: write) } 686attributes #17 = { nocallback nofree nosync nounwind willreturn } 687attributes #18 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } 688attributes #19 = { convergent } 689attributes #20 = { nounwind } 690attributes #21 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier" } 691attributes #22 = { convergent noreturn nounwind } 692attributes #23 = { memory(write) } 693attributes #24 = { convergent nounwind } 694attributes #25 = { "llvm.assume"="ompx_aligned_barrier" } 695attributes #26 = { noreturn } 696attributes #27 = { nofree willreturn } 697attributes #28 = { convergent nounwind willreturn } 698attributes #29 = { nofree nounwind willreturn } 699 700!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10} 701!llvm.dbg.cu = !{!11} 702!nvvm.annotations = !{!13} 703!omp_offload.info = !{!14} 704!llvm.ident = !{!15, !16, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15} 705!nvvmir.version = !{!17} 706 707!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 12, i32 3]} 708!1 = !{i32 7, !"Dwarf Version", i32 2} 709!2 = !{i32 2, !"Debug Info Version", i32 3} 710!3 = !{i32 1, !"wchar_size", i32 4} 711!4 = !{i32 4, !"nvvm-reflect-ftz", i32 0} 712!5 = !{i32 7, !"openmp", i32 51} 713!6 = !{i32 7, !"openmp-device", i32 51} 714!7 = !{i32 8, !"PIC Level", i32 2} 715!8 = !{i32 7, !"frame-pointer", i32 2} 716!9 = !{i32 1, !"ThinLTO", i32 0} 717!10 = !{i32 1, !"EnableSplitLTOUnit", i32 1} 718!11 = distinct !DICompileUnit(language: DW_LANG_C11, file: !12, producer: "clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) 719!12 = !DIFile(filename: "test.c", directory: "/tmp") 720!13 = !{ptr @__omp_offloading_fd02_1116d6_h_l12, !"maxntidx", i32 128} 721!14 = !{i32 0, i32 64770, i32 1119958, !"h", i32 12, i32 0, i32 0} 722!15 = !{!"clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)"} 723!16 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} 724!17 = !{i32 2, i32 0} 725!18 = distinct !DISubprogram(name: "__omp_offloading_fd02_1116d6_h_l12_debug__", scope: !12, file: !12, line: 13, type: !19, scopeLine: 13, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !11, retainedNodes: !24) 726!19 = !DISubroutineType(types: !20) 727!20 = !{null, !21} 728!21 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22) 729!22 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !23) 730!23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) 731!24 = !{} 732!25 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !18, type: !21, flags: DIFlagArtificial) 733!26 = !DILocation(line: 0, scope: !18) 734!27 = !DILocation(line: 13, column: 3, scope: !18) 735!28 = !DILocalVariable(name: "i", scope: !29, file: !12, line: 14, type: !30) 736!29 = distinct !DILexicalBlock(scope: !18, file: !12, line: 13, column: 3) 737!30 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) 738!31 = !DILocation(line: 14, column: 9, scope: !29) 739!32 = !DILocalVariable(name: "a", scope: !29, file: !12, line: 15, type: !33) 740!33 = !DICompositeType(tag: DW_TAG_array_type, baseType: !30, size: 64, elements: !34) 741!34 = !{!35} 742!35 = !DISubrange(count: 2) 743!36 = !DILocation(line: 15, column: 9, scope: !29) 744!37 = !DILocation(line: 16, column: 5, scope: !29) 745!38 = !DILocation(line: 17, column: 5, scope: !29) 746!39 = !DILocation(line: 18, column: 3, scope: !29) 747!40 = !DILocation(line: 18, column: 3, scope: !18) 748!41 = distinct !DISubprogram(name: "__omp_offloading_fd02_1116d6_h_l12", scope: !12, file: !12, line: 12, type: !19, scopeLine: 12, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !11, retainedNodes: !24) 749!42 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !41, type: !21, flags: DIFlagArtificial) 750!43 = !DILocation(line: 0, scope: !41) 751!44 = !DILocation(line: 12, column: 1, scope: !41) 752!45 = distinct !DISubprogram(name: "g", scope: !12, file: !12, line: 3, type: !46, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !11, retainedNodes: !24) 753!46 = !DISubroutineType(types: !47) 754!47 = !{null} 755!48 = !DILocalVariable(name: "i", scope: !45, file: !12, line: 4, type: !30) 756!49 = !DILocation(line: 4, column: 7, scope: !45) 757!50 = !DILocalVariable(name: "a", scope: !45, file: !12, line: 5, type: !33) 758!51 = !DILocation(line: 5, column: 7, scope: !45) 759!52 = !DILocation(line: 6, column: 3, scope: !45) 760!53 = !DILocation(line: 7, column: 3, scope: !45) 761!54 = !DILocation(line: 8, column: 1, scope: !45) 762!55 = !{!56, !59, i64 2} 763!56 = !{!"_ZTS26ConfigurationEnvironmentTy", !57, i64 0, !57, i64 1, !59, i64 2, !60, i64 4, !60, i64 8, !60, i64 12, !60, i64 16, !60, i64 20, !60, i64 24} 764!57 = !{!"omnipotent char", !58, i64 0} 765!58 = !{!"Simple C++ TBAA"} 766!59 = !{!"_ZTSN4llvm3omp19OMPTgtExecModeFlagsE", !57, i64 0} 767!60 = !{!"int", !57, i64 0} 768!61 = !{!56, !57, i64 0} 769!62 = !{!60, !60, i64 0} 770!63 = !{!57, !57, i64 0} 771!64 = !{!65, !60, i64 16} 772!65 = !{!"_ZTSN4ompx5state11TeamStateTyE", !66, i64 0, !60, i64 28, !60, i64 32, !67, i64 40} 773!66 = !{!"_ZTSN4ompx5state10ICVStateTyE", !60, i64 0, !60, i64 4, !60, i64 8, !60, i64 12, !60, i64 16, !60, i64 20, !60, i64 24} 774!67 = !{!"p1 void", !68, i64 0} 775!68 = !{!"any pointer", !57, i64 0} 776!69 = !{!65, !60, i64 20} 777!70 = !{!65, !60, i64 24} 778!71 = !{!65, !60, i64 28} 779!72 = !{!65, !60, i64 32} 780!73 = !{!65, !67, i64 40} 781!74 = !{!75, !75, i64 0} 782!75 = !{!"p2 _ZTSN4ompx5state13ThreadStateTyE", !68, i64 0} 783!76 = !{!77, !77, i64 0} 784!77 = !{!"p1 _ZTS19KernelEnvironmentTy", !68, i64 0} 785!78 = !{!79, !79, i64 0} 786!79 = !{!"p1 _ZTS25KernelLaunchEnvironmentTy", !68, i64 0} 787!80 = !{!81, !81, i64 0} 788!81 = !{!"p2 _ZTS22DynamicScheduleTracker", !68, i64 0} 789!82 = !{i32 1, i32 1025} 790!83 = !{!84, !60, i64 0} 791!84 = !{!"_ZTS19DeviceEnvironmentTy", !60, i64 0, !60, i64 4, !60, i64 8, !60, i64 12, !85, i64 16, !85, i64 24, !85, i64 32, !85, i64 40} 792!85 = !{!"long", !57, i64 0} 793!86 = !{!66, !60, i64 0} 794!87 = !{!66, !60, i64 4} 795!88 = !{!66, !60, i64 8} 796!89 = !{!66, !60, i64 16} 797!90 = !{!66, !60, i64 20} 798!91 = !{!66, !60, i64 24} 799!92 = !{i32 0, i32 1024} 800!93 = !{!67, !67, i64 0} 801!94 = distinct !{!94, !95} 802!95 = !{!"llvm.loop.mustprogress"} 803!96 = !{!97, !97, i64 0} 804!97 = !{!"p1 _ZTSN4ompx5state13ThreadStateTyE", !68, i64 0} 805!98 = !{!"branch_weights", !"expected", i32 2000, i32 1} 806!99 = !{!100, !97, i64 32} 807!100 = !{!"_ZTSN4ompx5state13ThreadStateTyE", !66, i64 0, !97, i64 32} 808!101 = !{!102, !102, i64 0} 809!102 = !{!"p1 omnipotent char", !68, i64 0} 810!103 = !{!104, !57, i64 0} 811!104 = !{!"_ZTS19KernelEnvironmentTy", !56, i64 0, !105, i64 32, !106, i64 40} 812!105 = !{!"p1 _ZTS7IdentTy", !68, i64 0} 813!106 = !{!"p1 _ZTS20DynamicEnvironmentTy", !68, i64 0} 814