xref: /llvm-project/llvm/test/Analysis/KernelInfo/openmp/nvptx.ll (revision 18f8106f310ee702046a11f360af47947c030d2e)
1; See ./README.md for how to maintain the LLVM IR in this test.
2
3; REQUIRES: nvptx-registered-target
4
5; RUN: opt -pass-remarks=kernel-info -passes=kernel-info \
6; RUN:     -disable-output %s 2>&1 | \
7; RUN:   FileCheck -match-full-lines %s
8
9;  CHECK-NOT: remark:
10;      CHECK: remark: test.c:0:0: in artificial function '[[OFF_FUNC:__omp_offloading_[a-f0-9_]*_h_l12]]_debug__', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes
11; CHECK-NEXT: remark: test.c:14:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'i' with static size of 4 bytes
12; CHECK-NEXT: remark: test.c:15:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'a' with static size of 8 bytes
13; CHECK-NEXT: remark: <unknown>:0:0: in artificial function '[[OFF_FUNC]]_debug__', 'store' instruction accesses memory in flat address space
14; CHECK-NEXT: remark: test.c:13:3: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is '@__kmpc_target_init'
15; CHECK-NEXT: remark: test.c:16:5: in artificial function '[[OFF_FUNC]]_debug__', direct call, callee is '@f'
16; CHECK-NEXT: remark: test.c:17:5: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is 'g'
17; CHECK-NEXT: remark: test.c:18:3: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is '@__kmpc_target_deinit'
18; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', ExternalNotKernel = 0
19; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Allocas = 3
20; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasStaticSizeSum = 20
21; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasDyn = 0
22; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCalls = 4
23; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', IndirectCalls = 0
24; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCallsToDefinedFunctions = 3
25; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', InlineAssemblyCalls = 0
26; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Invokes = 0
27; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', FlatAddrspaceAccesses = 1
28
29; CHECK-NEXT: remark: test.c:0:0: in artificial function '[[OFF_FUNC]]', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes
30; CHECK-NEXT: remark: <unknown>:0:0: in artificial function '[[OFF_FUNC]]', 'store' instruction accesses memory in flat address space
31; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', 'load' instruction ('%[[#]]') accesses memory in flat address space
32; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', direct call to defined function, callee is artificial '[[OFF_FUNC]]_debug__'
33; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', ExternalNotKernel = 0
34; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', omp_target_thread_limit = 128
35; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', maxntidx = 128
36; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Allocas = 1
37; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasStaticSizeSum = 8
38; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasDyn = 0
39; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCalls = 1
40; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', IndirectCalls = 0
41; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCallsToDefinedFunctions = 1
42; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', InlineAssemblyCalls = 0
43; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Invokes = 0
44; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', FlatAddrspaceAccesses = 2
45
46; CHECK-NEXT: remark: test.c:4:7: in function 'g', alloca ('%[[#]]') for 'i' with static size of 4 bytes
47; CHECK-NEXT: remark: test.c:5:7: in function 'g', alloca ('%[[#]]') for 'a' with static size of 8 bytes
48; CHECK-NEXT: remark: test.c:6:3: in function 'g', direct call, callee is '@f'
49; CHECK-NEXT: remark: test.c:7:3: in function 'g', direct call to defined function, callee is 'g'
50; CHECK-NEXT: remark: test.c:3:0: in function 'g', ExternalNotKernel = 1
51; CHECK-NEXT: remark: test.c:3:0: in function 'g', Allocas = 2
52; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasStaticSizeSum = 12
53; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasDyn = 0
54; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCalls = 2
55; CHECK-NEXT: remark: test.c:3:0: in function 'g', IndirectCalls = 0
56; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCallsToDefinedFunctions = 1
57; CHECK-NEXT: remark: test.c:3:0: in function 'g', InlineAssemblyCalls = 0
58; CHECK-NEXT: remark: test.c:3:0: in function 'g', Invokes = 0
59; CHECK-NEXT: remark: test.c:3:0: in function 'g', FlatAddrspaceAccesses = 0
60;  CHECK-NOT: remark: {{.*: in function 'g',.*}}
61
62; A lot of internal functions (e.g., __kmpc_target_init) come next, but we don't
63; want to maintain a list of their allocas, calls, etc. in this test.
64
65; ModuleID = 'test-openmp-nvptx64-nvidia-cuda-sm_70.bc'
66source_filename = "test.c"
67target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
68target triple = "nvptx64-nvidia-cuda"
69
70%struct.ident_t = type { i32, i32, i32, i32, ptr }
71%struct.DynamicEnvironmentTy = type { i16 }
72%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
73%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
74%struct.DeviceMemoryPoolTy = type { ptr, i64 }
75%struct.DeviceMemoryPoolTrackingTy = type { i64, i64, i64, i64 }
76%struct.DeviceEnvironmentTy = type { i32, i32, i32, i32, i64, i64, i64, i64 }
77%"struct.rpc::Client" = type { %"struct.rpc::Process" }
78%"struct.rpc::Process" = type { i32, ptr, ptr, ptr, ptr, [128 x i32] }
79%"struct.(anonymous namespace)::SharedMemorySmartStackTy" = type { [512 x i8], [1024 x i8] }
80%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr }
81%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32, i32 }
82
83@__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
84@__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
85@0 = private unnamed_addr constant [58 x i8] c";test.c;__omp_offloading_fd02_1116d6_h_l12_debug__;13;3;;\00", align 1
86@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 57, ptr @0 }, align 8
87@__omp_offloading_fd02_1116d6_h_l12_dynamic_environment = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
88@__omp_offloading_fd02_1116d6_h_l12_kernel_environment = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 128, i32 -1, i32 -1, i32 0, i32 0 }, ptr @1, ptr @__omp_offloading_fd02_1116d6_h_l12_dynamic_environment }
89@llvm.used = appending global [4 x ptr] [ptr @__llvm_rpc_client, ptr addrspacecast (ptr addrspace(4) @__omp_rtl_device_environment to ptr), ptr @__omp_rtl_device_memory_pool, ptr @__omp_rtl_device_memory_pool_tracker], section "llvm.metadata"
90@__omp_rtl_device_memory_pool = weak protected global %struct.DeviceMemoryPoolTy zeroinitializer, align 8
91@__omp_rtl_device_memory_pool_tracker = weak protected global %struct.DeviceMemoryPoolTrackingTy zeroinitializer, align 8
92@__omp_rtl_debug_kind = weak_odr hidden constant i32 0
93@__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
94@__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
95@__omp_rtl_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 8
96@.str = private unnamed_addr constant [40 x i8] c"%s:%u: %s: Assertion %s (`%s`) failed.\0A\00", align 1
97@.str1 = private unnamed_addr constant [35 x i8] c"%s:%u: %s: Assertion `%s` failed.\0A\00", align 1
98@.str15 = private unnamed_addr constant [43 x i8] c"/tmp/llvm/offload/DeviceRTL/src/Kernel.cpp\00", align 1
99@__PRETTY_FUNCTION__._ZL19genericStateMachineP7IdentTy = private unnamed_addr constant [36 x i8] c"void genericStateMachine(IdentTy *)\00", align 1
100@.str2 = private unnamed_addr constant [18 x i8] c"WorkFn == nullptr\00", align 1
101@__PRETTY_FUNCTION__.__kmpc_target_deinit = private unnamed_addr constant [28 x i8] c"void __kmpc_target_deinit()\00", align 1
102@IsSPMDMode = internal local_unnamed_addr addrspace(3) global i32 undef, align 4
103@__llvm_rpc_client = weak protected global %"struct.rpc::Client" zeroinitializer, align 8
104@.str1027 = private unnamed_addr constant [48 x i8] c"/tmp/llvm/offload/DeviceRTL/src/Parallelism.cpp\00", align 1
105@.str12 = private unnamed_addr constant [23 x i8] c"!mapping::isSPMDMode()\00", align 1
106@__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel = private unnamed_addr constant [34 x i8] c"void __kmpc_kernel_end_parallel()\00", align 1
107@_ZL20KernelEnvironmentPtr = internal unnamed_addr addrspace(3) global ptr undef, align 8
108@_ZL26KernelLaunchEnvironmentPtr = internal unnamed_addr addrspace(3) global ptr undef, align 8
109@_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16
110@.str444 = private unnamed_addr constant [42 x i8] c"/tmp/llvm/offload/DeviceRTL/src/State.cpp\00", align 1
111@.str747 = private unnamed_addr constant [33 x i8] c"NThreadsVar == Other.NThreadsVar\00", align 1
112@__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_ = private unnamed_addr constant [68 x i8] c"void ompx::state::ICVStateTy::assertEqual(const ICVStateTy &) const\00", align 1
113@.str848 = private unnamed_addr constant [27 x i8] c"LevelVar == Other.LevelVar\00", align 1
114@.str949 = private unnamed_addr constant [39 x i8] c"ActiveLevelVar == Other.ActiveLevelVar\00", align 1
115@.str1050 = private unnamed_addr constant [47 x i8] c"MaxActiveLevelsVar == Other.MaxActiveLevelsVar\00", align 1
116@.str1151 = private unnamed_addr constant [33 x i8] c"RunSchedVar == Other.RunSchedVar\00", align 1
117@.str1252 = private unnamed_addr constant [43 x i8] c"RunSchedChunkVar == Other.RunSchedChunkVar\00", align 1
118@.str13 = private unnamed_addr constant [43 x i8] c"ParallelTeamSize == Other.ParallelTeamSize\00", align 1
119@__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_ = private unnamed_addr constant [64 x i8] c"void ompx::state::TeamStateTy::assertEqual(TeamStateTy &) const\00", align 1
120@.str14 = private unnamed_addr constant [39 x i8] c"HasThreadState == Other.HasThreadState\00", align 1
121@.str23 = private unnamed_addr constant [32 x i8] c"mapping::isSPMDMode() == IsSPMD\00", align 1
122@__PRETTY_FUNCTION__._ZN4ompx5state18assumeInitialStateEb = private unnamed_addr constant [43 x i8] c"void ompx::state::assumeInitialState(bool)\00", align 1
123@_ZL9ThreadDST = internal unnamed_addr addrspace(3) global ptr undef, align 8
124@_ZN4ompx5state9TeamStateE = internal local_unnamed_addr addrspace(3) global %"struct.ompx::state::TeamStateTy" undef, align 8
125@_ZN4ompx5state12ThreadStatesE = internal addrspace(3) global ptr undef, align 8
126
127; Function Attrs: convergent noinline norecurse nounwind optnone
128define internal void @__omp_offloading_fd02_1116d6_h_l12_debug__(ptr noalias noundef %0) #0 !dbg !18 {
129  %2 = alloca ptr, align 8
130  %3 = alloca i32, align 4
131  %4 = alloca [2 x i32], align 4
132  store ptr %0, ptr %2, align 8
133    #dbg_declare(ptr %2, !25, !DIExpression(), !26)
134  %5 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_1116d6_h_l12_kernel_environment, ptr %0), !dbg !27
135  %6 = icmp eq i32 %5, -1, !dbg !27
136  br i1 %6, label %7, label %8, !dbg !27
137
1387:                                                ; preds = %1
139    #dbg_declare(ptr %3, !28, !DIExpression(), !31)
140    #dbg_declare(ptr %4, !32, !DIExpression(), !36)
141  call void @f() #19, !dbg !37
142  call void @g() #19, !dbg !38
143  call void @__kmpc_target_deinit(), !dbg !39
144  ret void, !dbg !40
145
1468:                                                ; preds = %1
147  ret void, !dbg !27
148}
149
150; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
151define weak_odr protected ptx_kernel void @__omp_offloading_fd02_1116d6_h_l12(ptr noalias noundef %0) #1 !dbg !41 {
152  %2 = alloca ptr, align 8
153  store ptr %0, ptr %2, align 8
154    #dbg_declare(ptr %2, !42, !DIExpression(), !43)
155  %3 = load ptr, ptr %2, align 8, !dbg !44
156  call void @__omp_offloading_fd02_1116d6_h_l12_debug__(ptr %3) #20, !dbg !44
157  ret void, !dbg !44
158}
159
160; Function Attrs: convergent
161declare void @f(...) #2
162
163; Function Attrs: convergent noinline nounwind optnone
164define hidden void @g() #3 !dbg !45 {
165  %1 = alloca i32, align 4
166  %2 = alloca [2 x i32], align 4
167    #dbg_declare(ptr %1, !48, !DIExpression(), !49)
168    #dbg_declare(ptr %2, !50, !DIExpression(), !51)
169  call void @f() #19, !dbg !52
170  call void @g() #19, !dbg !53
171  ret void, !dbg !54
172}
173
174; Function Attrs: convergent mustprogress nounwind
175define internal noundef range(i32 -1, 1024) i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(48) %0, ptr nofree noundef nonnull align 8 dereferenceable(16) %1) #4 {
176  %3 = alloca ptr, align 8
177  %4 = getelementptr inbounds nuw i8, ptr %0, i64 2
178  %5 = load i8, ptr %4, align 2, !tbaa !55
179  %6 = and i8 %5, 2
180  %7 = icmp eq i8 %6, 0
181  %8 = load i8, ptr %0, align 8, !tbaa !61
182  %9 = icmp ne i8 %8, 0
183  br i1 %7, label %21, label %10
184
18510:                                               ; preds = %2
186  %11 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
187  %12 = icmp eq i32 %11, 0
188  br i1 %12, label %13, label %14
189
19013:                                               ; preds = %10
191  store i32 1, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62
192  store i8 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512) to ptr addrspace(3)), align 1, !tbaa !63
193  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(48) addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i8 noundef 0, i64 noundef 16, i1 noundef false)
194  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !64
195  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !69
196  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !70
197  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71
198  store i32 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72
199  store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !73
200  store ptr null, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74
201  store ptr %0, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76
202  store ptr %1, ptr addrspace(3) @_ZL26KernelLaunchEnvironmentPtr, align 8, !tbaa !78
203  br label %18
204
20514:                                               ; preds = %10
206  %15 = zext nneg i32 %11 to i64
207  %16 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %15
208  %17 = addrspacecast ptr %16 to ptr addrspace(3)
209  store i8 0, ptr addrspace(3) %17, align 1, !tbaa !63
210  br label %18
211
21218:                                               ; preds = %14, %13
213  br i1 %12, label %19, label %20
214
21519:                                               ; preds = %18
216  store ptr null, ptr addrspace(3) @_ZL9ThreadDST, align 8, !tbaa !80
217  br label %20
218
21920:                                               ; preds = %18, %19
220  tail call void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 poison) #21
221  br label %37
222
22321:                                               ; preds = %2
224  %22 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82
225  %23 = add nsw i32 %22, -1
226  %24 = and i32 %23, -32
227  %25 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
228  %26 = icmp eq i32 %25, %24
229  br i1 %26, label %27, label %31
230
23127:                                               ; preds = %21
232  store i32 0, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62
233  %28 = zext nneg i32 %25 to i64
234  %29 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %28
235  %30 = addrspacecast ptr %29 to ptr addrspace(3)
236  store i8 0, ptr addrspace(3) %30, align 1, !tbaa !63
237  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(48) addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i8 noundef 0, i64 noundef 16, i1 noundef false)
238  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !64
239  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !69
240  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !70
241  store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71
242  store i32 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72
243  store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !73
244  store ptr null, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74
245  store ptr %0, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76
246  store ptr %1, ptr addrspace(3) @_ZL26KernelLaunchEnvironmentPtr, align 8, !tbaa !78
247  br label %35
248
24931:                                               ; preds = %21
250  %32 = zext nneg i32 %25 to i64
251  %33 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %32
252  %34 = addrspacecast ptr %33 to ptr addrspace(3)
253  store i8 0, ptr addrspace(3) %34, align 1, !tbaa !63
254  br label %35
255
25635:                                               ; preds = %31, %27
257  br i1 %26, label %36, label %37
258
25936:                                               ; preds = %35
260  store ptr null, ptr addrspace(3) @_ZL9ThreadDST, align 8, !tbaa !80
261  br label %37
262
26337:                                               ; preds = %36, %35, %20
264  br i1 %7, label %100, label %38
265
26638:                                               ; preds = %37
267  %39 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62
268  %40 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83
269  %41 = and i32 %39, 1
270  %42 = and i32 %41, %40
271  %43 = icmp ne i32 %42, 0
272  %44 = load i32, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8, !tbaa !86
273  %45 = icmp ne i32 %44, 0
274  %46 = select i1 %43, i1 %45, i1 false
275  br i1 %46, label %47, label %48
276
27747:                                               ; preds = %38
278  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(33) @.str747, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 193, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22
279  unreachable
280
28148:                                               ; preds = %38
282  %49 = icmp eq i32 %44, 0
283  tail call void @llvm.assume(i1 noundef %49) #23
284  %50 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 4) to ptr addrspace(3)), align 4, !tbaa !87
285  br i1 %43, label %51, label %54
286
28751:                                               ; preds = %48
288  %52 = icmp eq i32 %50, 0
289  br i1 %52, label %54, label %53
290
29153:                                               ; preds = %51
292  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(27) @.str848, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 194, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22
293  unreachable
294
29554:                                               ; preds = %51, %48
296  %55 = phi i32 [ 0, %51 ], [ %50, %48 ]
297  %56 = icmp eq i32 %55, 0
298  tail call void @llvm.assume(i1 noundef %56) #23
299  %57 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 8) to ptr addrspace(3)), align 8, !tbaa !88
300  br i1 %43, label %58, label %61
301
30258:                                               ; preds = %54
303  %59 = icmp eq i32 %57, 0
304  br i1 %59, label %61, label %60
305
30660:                                               ; preds = %58
307  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(39) @.str949, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 195, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22
308  unreachable
309
31061:                                               ; preds = %58, %54
311  %62 = phi i32 [ 0, %58 ], [ %57, %54 ]
312  %63 = icmp eq i32 %62, 0
313  tail call void @llvm.assume(i1 noundef %63) #23
314  %64 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !89
315  br i1 %43, label %65, label %68
316
31765:                                               ; preds = %61
318  %66 = icmp eq i32 %64, 1
319  br i1 %66, label %68, label %67
320
32167:                                               ; preds = %65
322  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(47) @.str1050, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 196, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22
323  unreachable
324
32568:                                               ; preds = %65, %61
326  %69 = phi i32 [ 1, %65 ], [ %64, %61 ]
327  %70 = icmp eq i32 %69, 1
328  tail call void @llvm.assume(i1 noundef %70) #23
329  %71 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !90
330  br i1 %43, label %72, label %93
331
33272:                                               ; preds = %68
333  %73 = icmp eq i32 %71, 1
334  br i1 %73, label %75, label %74
335
33674:                                               ; preds = %72
337  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(33) @.str1151, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 197, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22
338  unreachable
339
34075:                                               ; preds = %72
341  %76 = icmp eq i32 1, 1
342  tail call void @llvm.assume(i1 noundef %76) #23
343  br i1 %43, label %77, label %95
344
34577:                                               ; preds = %75
346  %78 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !91
347  %79 = icmp eq i32 %78, 1
348  br i1 %79, label %81, label %80
349
35080:                                               ; preds = %77
351  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(43) @.str1252, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 198, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22
352  unreachable
353
35481:                                               ; preds = %77
355  %82 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71
356  %83 = icmp eq i32 %82, 1
357  br i1 %83, label %85, label %84
358
35984:                                               ; preds = %81
360  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(43) @.str13, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 222, ptr noundef nonnull dereferenceable(64) @__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_) #22
361  unreachable
362
36385:                                               ; preds = %81
364  %86 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72
365  %87 = icmp eq i32 %86, 0
366  br i1 %87, label %89, label %88
367
36888:                                               ; preds = %85
369  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(39) @.str14, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 223, ptr noundef nonnull dereferenceable(64) @__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_) #22
370  unreachable
371
37289:                                               ; preds = %85
373  %90 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62
374  %91 = icmp eq i32 %90, 0
375  br i1 %91, label %92, label %98
376
37792:                                               ; preds = %89
378  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(32) @.str23, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 326, ptr noundef nonnull dereferenceable(43) @__PRETTY_FUNCTION__._ZN4ompx5state18assumeInitialStateEb) #22
379  unreachable
380
38193:                                               ; preds = %68
382  %94 = icmp eq i32 %71, 1
383  tail call void @llvm.assume(i1 noundef %94) #23
384  br label %95
385
38695:                                               ; preds = %75, %93
387  %96 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62
388  %97 = icmp ne i32 %96, 0
389  br label %98
390
39198:                                               ; preds = %89, %95
392  %99 = phi i1 [ %97, %95 ], [ true, %89 ]
393  tail call void @llvm.assume(i1 noundef %99) #23
394  tail call void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 poison) #21
395  br label %130
396
397100:                                              ; preds = %37
398  %101 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82
399  %102 = add nsw i32 %101, -1
400  %103 = and i32 %102, -32
401  %104 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !92
402  %105 = icmp eq i32 %104, %103
403  br i1 %105, label %130, label %106
404
405106:                                              ; preds = %100
406  %107 = add nsw i32 %101, -32
407  %108 = icmp ult i32 %104, %107
408  %109 = select i1 %9, i1 %108, i1 false
409  br i1 %109, label %110, label %130
410
411110:                                              ; preds = %106
412  %111 = load i32, ptr @__omp_rtl_debug_kind, align 4
413  %112 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8
414  %113 = and i32 %111, 1
415  %114 = and i32 %113, %112
416  %115 = icmp ne i32 %114, 0
417  br label %116
418
419116:                                              ; preds = %110, %128
420  call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %3) #20
421  tail call void @llvm.nvvm.barrier.sync(i32 noundef 8)
422  %117 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %3) #20
423  %118 = load ptr, ptr %3, align 8, !tbaa !93
424  %119 = icmp eq ptr %118, null
425  br i1 %119, label %129, label %120
426
427120:                                              ; preds = %116
428  br i1 %117, label %121, label %128
429
430121:                                              ; preds = %120
431  %122 = load i32, ptr addrspace(3) @IsSPMDMode, align 4
432  %123 = icmp ne i32 %122, 0
433  %124 = select i1 %115, i1 %123, i1 false
434  br i1 %124, label %125, label %126
435
436125:                                              ; preds = %121
437  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(67) @.str15, i32 noundef 60, ptr noundef nonnull dereferenceable(36) @__PRETTY_FUNCTION__._ZL19genericStateMachineP7IdentTy) #22
438  unreachable
439
440126:                                              ; preds = %121
441  %127 = icmp eq i32 %122, 0
442  tail call void @llvm.assume(i1 noundef %127) #23
443  tail call void %118(i32 noundef 0, i32 noundef %104) #24
444  tail call void @__kmpc_kernel_end_parallel() #24
445  br label %128
446
447128:                                              ; preds = %126, %120
448  tail call void @llvm.nvvm.barrier.sync(i32 noundef 8)
449  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20
450  br label %116, !llvm.loop !94
451
452129:                                              ; preds = %116
453  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20
454  br label %130
455
456130:                                              ; preds = %106, %129, %100, %98
457  %131 = phi i32 [ -1, %98 ], [ -1, %100 ], [ %104, %129 ], [ %104, %106 ]
458  ret i32 %131
459}
460
461; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
462declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #5
463
464; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
465declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #6
466
467; Function Attrs: convergent mustprogress noinline norecurse nounwind
468define internal void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 %0) local_unnamed_addr #7 {
469  tail call void @llvm.nvvm.barrier0() #25
470  ret void
471}
472
473; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
474declare noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #5
475
476; Function Attrs: cold convergent mustprogress noreturn nounwind
477define internal fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(8) %0, ptr noundef %1, ptr noundef nonnull dereferenceable(66) %2, i32 noundef range(i32 60, 905) %3, ptr noundef nonnull dereferenceable(20) %4) unnamed_addr #8 {
478  %6 = icmp eq ptr %1, null
479  br i1 %6, label %9, label %7
480
4817:                                                ; preds = %5
482  %8 = tail call noundef i32 (ptr, ...) @_ZN4ompx6printfEPKcz(ptr noundef nonnull dereferenceable(40) @.str, ptr noundef nonnull dereferenceable(66) %2, i32 noundef %3, ptr noundef nonnull dereferenceable(20) %4, ptr noundef nonnull %1, ptr noundef nonnull dereferenceable(8) %0) #24
483  br label %11
484
4859:                                                ; preds = %5
486  %10 = tail call noundef i32 (ptr, ...) @_ZN4ompx6printfEPKcz(ptr noundef nonnull dereferenceable(35) @.str1, ptr noundef nonnull dereferenceable(66) %2, i32 noundef %3, ptr noundef nonnull dereferenceable(20) %4, ptr noundef nonnull dereferenceable(8) %0) #24
487  br label %11
488
48911:                                               ; preds = %9, %7
490  tail call void @llvm.trap() #26
491  unreachable
492}
493
494; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
495declare void @llvm.assume(i1 noundef) #9
496
497; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
498declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #10
499
500; Function Attrs: convergent nocallback nounwind
501declare void @llvm.nvvm.barrier.sync(i32) #11
502
503; Function Attrs: convergent mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: write, inaccessiblemem: none)
504define internal noundef zeroext i1 @__kmpc_kernel_parallel(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) initializes((0, 8)) %0) local_unnamed_addr #12 {
505  %2 = load ptr, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !93
506  store ptr %2, ptr %0, align 8, !tbaa !93
507  %3 = icmp eq ptr %2, null
508  br i1 %3, label %15, label %4
509
5104:                                                ; preds = %1
511  %5 = tail call noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() #27, !range !92
512  %6 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !62
513  %7 = icmp eq i32 %6, 0
514  %8 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82
515  %9 = load i32, ptr addrspace(3) @IsSPMDMode, align 4
516  %10 = icmp eq i32 %9, 0
517  %11 = select i1 %10, i32 -32, i32 0
518  %12 = add nsw i32 %11, %8
519  %13 = select i1 %7, i32 %12, i32 %6
520  %14 = icmp ult i32 %5, %13
521  br label %15
522
52315:                                               ; preds = %4, %1
524  %16 = phi i1 [ %14, %4 ], [ false, %1 ]
525  ret i1 %16
526}
527
528; Function Attrs: convergent mustprogress noinline nounwind
529define internal void @__kmpc_kernel_end_parallel() local_unnamed_addr #13 {
530  %1 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62
531  %2 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83
532  %3 = and i32 %1, 1
533  %4 = and i32 %3, %2
534  %5 = icmp ne i32 %4, 0
535  %6 = load i32, ptr addrspace(3) @IsSPMDMode, align 4
536  %7 = icmp ne i32 %6, 0
537  %8 = select i1 %5, i1 %7, i1 false
538  br i1 %8, label %9, label %10
539
5409:                                                ; preds = %0
541  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(72) @.str1027, i32 noundef 299, ptr noundef nonnull dereferenceable(34) @__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel) #22
542  unreachable
543
54410:                                               ; preds = %0
545  %11 = icmp eq i32 %6, 0
546  tail call void @llvm.assume(i1 noundef %11) #23
547  %12 = load i32, ptr @__omp_rtl_assume_no_thread_state, align 4, !tbaa !62
548  %13 = icmp eq i32 %12, 0
549  %14 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8
550  %15 = icmp ne i32 %14, 0
551  %16 = select i1 %13, i1 %15, i1 false
552  br i1 %16, label %17, label %30
553
55417:                                               ; preds = %10
555  %18 = tail call noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() #27, !range !92
556  %19 = load ptr, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74
557  %20 = zext nneg i32 %18 to i64
558  %21 = getelementptr inbounds nuw ptr, ptr %19, i64 %20
559  %22 = load ptr, ptr %21, align 8, !tbaa !96
560  %23 = icmp eq ptr %22, null
561  br i1 %23, label %30, label %24, !prof !98
562
56324:                                               ; preds = %17
564  %25 = getelementptr inbounds nuw i8, ptr %22, i64 32
565  %26 = load ptr, ptr %25, align 8, !tbaa !99
566  tail call void @free(ptr noundef nonnull dereferenceable(40) %22) #28
567  %27 = load ptr, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74
568  %28 = getelementptr inbounds nuw ptr, ptr %27, i64 %20
569  store ptr %26, ptr %28, align 8, !tbaa !96
570  %29 = load i32, ptr addrspace(3) @IsSPMDMode, align 4
571  br label %30
572
57330:                                               ; preds = %10, %17, %24
574  %31 = phi i32 [ 0, %10 ], [ 0, %17 ], [ %29, %24 ]
575  %32 = icmp ne i32 %31, 0
576  %33 = select i1 %5, i1 %32, i1 false
577  br i1 %33, label %34, label %35
578
57934:                                               ; preds = %30
580  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(72) @.str1027, i32 noundef 302, ptr noundef nonnull dereferenceable(34) @__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel) #22
581  unreachable
582
58335:                                               ; preds = %30
584  %36 = icmp eq i32 %31, 0
585  tail call void @llvm.assume(i1 noundef %36) #23
586  ret void
587}
588
589; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
590declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #10
591
592; Function Attrs: convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite)
593declare extern_weak void @free(ptr allocptr nocapture noundef) local_unnamed_addr #14
594
595; Function Attrs: convergent mustprogress nounwind
596define internal noundef i32 @_ZN4ompx6printfEPKcz(ptr noundef %0, ...) local_unnamed_addr #15 {
597  %2 = alloca ptr, align 8
598  call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 %2) #29
599  call void @llvm.va_start.p0(ptr noundef nonnull align 8 %2) #27
600  %3 = load ptr, ptr %2, align 8, !tbaa !101
601  %4 = call i32 @vprintf(ptr noundef %0, ptr noundef %3) #24
602  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %2) #20
603  ret i32 %4
604}
605
606; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
607declare void @llvm.trap() #16
608
609; Function Attrs: nocallback nofree nosync nounwind willreturn
610declare void @llvm.va_start.p0(ptr) #17
611
612; Function Attrs: convergent nounwind
613declare i32 @vprintf(ptr noundef, ptr noundef) local_unnamed_addr #18
614
615; Function Attrs: convergent nocallback nounwind
616declare void @llvm.nvvm.barrier0() #11
617
618; Function Attrs: convergent mustprogress nounwind
619define internal void @__kmpc_target_deinit() #4 {
620  %1 = alloca ptr, align 8
621  %2 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62
622  %3 = icmp eq i32 %2, 0
623  br i1 %3, label %4, label %27
624
6254:                                                ; preds = %0
626  %5 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82
627  %6 = add nsw i32 %5, -1
628  %7 = and i32 %6, -32
629  %8 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !92
630  %9 = icmp eq i32 %8, %7
631  br i1 %9, label %10, label %11
632
63310:                                               ; preds = %4
634  store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !93
635  br label %27
636
63711:                                               ; preds = %4
638  %12 = load ptr, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76
639  %13 = load i8, ptr %12, align 8, !tbaa !103
640  %14 = icmp eq i8 %13, 0
641  br i1 %14, label %15, label %27
642
64315:                                               ; preds = %11
644  call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %1) #29
645  %16 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %1) #20
646  %17 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62
647  %18 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83
648  %19 = and i32 %17, 1
649  %20 = and i32 %19, %18
650  %21 = icmp eq i32 %20, 0
651  %22 = load ptr, ptr %1, align 8
652  %23 = icmp eq ptr %22, null
653  %24 = select i1 %21, i1 true, i1 %23
654  br i1 %24, label %26, label %25
655
65625:                                               ; preds = %15
657  tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(18) @.str2, ptr noundef null, ptr noundef nonnull dereferenceable(67) @.str15, i32 noundef 152, ptr noundef nonnull dereferenceable(28) @__PRETTY_FUNCTION__.__kmpc_target_deinit) #22
658  unreachable
659
66026:                                               ; preds = %15
661  tail call void @llvm.assume(i1 noundef %23) #23
662  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %1) #20
663  br label %27
664
66527:                                               ; preds = %26, %11, %10, %0
666  ret void
667}
668
669attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" }
670attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="128" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" }
671attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" }
672attributes #3 = { convergent noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" }
673attributes #4 = { convergent mustprogress nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
674attributes #5 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
675attributes #6 = { nocallback nofree nounwind willreturn memory(argmem: write) }
676attributes #7 = { convergent mustprogress noinline norecurse nounwind "frame-pointer"="all" "llvm.assume"="ompx_aligned_barrier" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
677attributes #8 = { cold convergent mustprogress noreturn nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
678attributes #9 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
679attributes #10 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
680attributes #11 = { convergent nocallback nounwind }
681attributes #12 = { convergent mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: write, inaccessiblemem: none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
682attributes #13 = { convergent mustprogress noinline nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
683attributes #14 = { convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
684attributes #15 = { convergent mustprogress nounwind "frame-pointer"="all" "no-builtin-printf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
685attributes #16 = { cold noreturn nounwind memory(inaccessiblemem: write) }
686attributes #17 = { nocallback nofree nosync nounwind willreturn }
687attributes #18 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
688attributes #19 = { convergent }
689attributes #20 = { nounwind }
690attributes #21 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier" }
691attributes #22 = { convergent noreturn nounwind }
692attributes #23 = { memory(write) }
693attributes #24 = { convergent nounwind }
694attributes #25 = { "llvm.assume"="ompx_aligned_barrier" }
695attributes #26 = { noreturn }
696attributes #27 = { nofree willreturn }
697attributes #28 = { convergent nounwind willreturn }
698attributes #29 = { nofree nounwind willreturn }
699
700!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
701!llvm.dbg.cu = !{!11}
702!nvvm.annotations = !{!13}
703!omp_offload.info = !{!14}
704!llvm.ident = !{!15, !16, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15}
705!nvvmir.version = !{!17}
706
707!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 12, i32 3]}
708!1 = !{i32 7, !"Dwarf Version", i32 2}
709!2 = !{i32 2, !"Debug Info Version", i32 3}
710!3 = !{i32 1, !"wchar_size", i32 4}
711!4 = !{i32 4, !"nvvm-reflect-ftz", i32 0}
712!5 = !{i32 7, !"openmp", i32 51}
713!6 = !{i32 7, !"openmp-device", i32 51}
714!7 = !{i32 8, !"PIC Level", i32 2}
715!8 = !{i32 7, !"frame-pointer", i32 2}
716!9 = !{i32 1, !"ThinLTO", i32 0}
717!10 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
718!11 = distinct !DICompileUnit(language: DW_LANG_C11, file: !12, producer: "clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
719!12 = !DIFile(filename: "test.c", directory: "/tmp")
720!13 = !{ptr @__omp_offloading_fd02_1116d6_h_l12, !"maxntidx", i32 128}
721!14 = !{i32 0, i32 64770, i32 1119958, !"h", i32 12, i32 0, i32 0}
722!15 = !{!"clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)"}
723!16 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
724!17 = !{i32 2, i32 0}
725!18 = distinct !DISubprogram(name: "__omp_offloading_fd02_1116d6_h_l12_debug__", scope: !12, file: !12, line: 13, type: !19, scopeLine: 13, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !11, retainedNodes: !24)
726!19 = !DISubroutineType(types: !20)
727!20 = !{null, !21}
728!21 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22)
729!22 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !23)
730!23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
731!24 = !{}
732!25 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !18, type: !21, flags: DIFlagArtificial)
733!26 = !DILocation(line: 0, scope: !18)
734!27 = !DILocation(line: 13, column: 3, scope: !18)
735!28 = !DILocalVariable(name: "i", scope: !29, file: !12, line: 14, type: !30)
736!29 = distinct !DILexicalBlock(scope: !18, file: !12, line: 13, column: 3)
737!30 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
738!31 = !DILocation(line: 14, column: 9, scope: !29)
739!32 = !DILocalVariable(name: "a", scope: !29, file: !12, line: 15, type: !33)
740!33 = !DICompositeType(tag: DW_TAG_array_type, baseType: !30, size: 64, elements: !34)
741!34 = !{!35}
742!35 = !DISubrange(count: 2)
743!36 = !DILocation(line: 15, column: 9, scope: !29)
744!37 = !DILocation(line: 16, column: 5, scope: !29)
745!38 = !DILocation(line: 17, column: 5, scope: !29)
746!39 = !DILocation(line: 18, column: 3, scope: !29)
747!40 = !DILocation(line: 18, column: 3, scope: !18)
748!41 = distinct !DISubprogram(name: "__omp_offloading_fd02_1116d6_h_l12", scope: !12, file: !12, line: 12, type: !19, scopeLine: 12, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !11, retainedNodes: !24)
749!42 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !41, type: !21, flags: DIFlagArtificial)
750!43 = !DILocation(line: 0, scope: !41)
751!44 = !DILocation(line: 12, column: 1, scope: !41)
752!45 = distinct !DISubprogram(name: "g", scope: !12, file: !12, line: 3, type: !46, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !11, retainedNodes: !24)
753!46 = !DISubroutineType(types: !47)
754!47 = !{null}
755!48 = !DILocalVariable(name: "i", scope: !45, file: !12, line: 4, type: !30)
756!49 = !DILocation(line: 4, column: 7, scope: !45)
757!50 = !DILocalVariable(name: "a", scope: !45, file: !12, line: 5, type: !33)
758!51 = !DILocation(line: 5, column: 7, scope: !45)
759!52 = !DILocation(line: 6, column: 3, scope: !45)
760!53 = !DILocation(line: 7, column: 3, scope: !45)
761!54 = !DILocation(line: 8, column: 1, scope: !45)
762!55 = !{!56, !59, i64 2}
763!56 = !{!"_ZTS26ConfigurationEnvironmentTy", !57, i64 0, !57, i64 1, !59, i64 2, !60, i64 4, !60, i64 8, !60, i64 12, !60, i64 16, !60, i64 20, !60, i64 24}
764!57 = !{!"omnipotent char", !58, i64 0}
765!58 = !{!"Simple C++ TBAA"}
766!59 = !{!"_ZTSN4llvm3omp19OMPTgtExecModeFlagsE", !57, i64 0}
767!60 = !{!"int", !57, i64 0}
768!61 = !{!56, !57, i64 0}
769!62 = !{!60, !60, i64 0}
770!63 = !{!57, !57, i64 0}
771!64 = !{!65, !60, i64 16}
772!65 = !{!"_ZTSN4ompx5state11TeamStateTyE", !66, i64 0, !60, i64 28, !60, i64 32, !67, i64 40}
773!66 = !{!"_ZTSN4ompx5state10ICVStateTyE", !60, i64 0, !60, i64 4, !60, i64 8, !60, i64 12, !60, i64 16, !60, i64 20, !60, i64 24}
774!67 = !{!"p1 void", !68, i64 0}
775!68 = !{!"any pointer", !57, i64 0}
776!69 = !{!65, !60, i64 20}
777!70 = !{!65, !60, i64 24}
778!71 = !{!65, !60, i64 28}
779!72 = !{!65, !60, i64 32}
780!73 = !{!65, !67, i64 40}
781!74 = !{!75, !75, i64 0}
782!75 = !{!"p2 _ZTSN4ompx5state13ThreadStateTyE", !68, i64 0}
783!76 = !{!77, !77, i64 0}
784!77 = !{!"p1 _ZTS19KernelEnvironmentTy", !68, i64 0}
785!78 = !{!79, !79, i64 0}
786!79 = !{!"p1 _ZTS25KernelLaunchEnvironmentTy", !68, i64 0}
787!80 = !{!81, !81, i64 0}
788!81 = !{!"p2 _ZTS22DynamicScheduleTracker", !68, i64 0}
789!82 = !{i32 1, i32 1025}
790!83 = !{!84, !60, i64 0}
791!84 = !{!"_ZTS19DeviceEnvironmentTy", !60, i64 0, !60, i64 4, !60, i64 8, !60, i64 12, !85, i64 16, !85, i64 24, !85, i64 32, !85, i64 40}
792!85 = !{!"long", !57, i64 0}
793!86 = !{!66, !60, i64 0}
794!87 = !{!66, !60, i64 4}
795!88 = !{!66, !60, i64 8}
796!89 = !{!66, !60, i64 16}
797!90 = !{!66, !60, i64 20}
798!91 = !{!66, !60, i64 24}
799!92 = !{i32 0, i32 1024}
800!93 = !{!67, !67, i64 0}
801!94 = distinct !{!94, !95}
802!95 = !{!"llvm.loop.mustprogress"}
803!96 = !{!97, !97, i64 0}
804!97 = !{!"p1 _ZTSN4ompx5state13ThreadStateTyE", !68, i64 0}
805!98 = !{!"branch_weights", !"expected", i32 2000, i32 1}
806!99 = !{!100, !97, i64 32}
807!100 = !{!"_ZTSN4ompx5state13ThreadStateTyE", !66, i64 0, !97, i64 32}
808!101 = !{!102, !102, i64 0}
809!102 = !{!"p1 omnipotent char", !68, i64 0}
810!103 = !{!104, !57, i64 0}
811!104 = !{!"_ZTS19KernelEnvironmentTy", !56, i64 0, !105, i64 32, !106, i64 40}
812!105 = !{!"p1 _ZTS7IdentTy", !68, i64 0}
813!106 = !{!"p1 _ZTS20DynamicEnvironmentTy", !68, i64 0}
814