1; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s 2; 3; Verify we change it to SPMD mode but also avoid propagating the old mode (=generic) into the __kmpc_target_init function. 4; 5; CHECK: @__omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null } 6; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode 7; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode 8; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode 9; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode 10; 11target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" 12target triple = "amdgcn-amd-amdhsa" 13 14%struct.ident_t = type { i32, i32, i32, i32, ptr } 15%struct.DeviceEnvironmentTy = type { i32, i32, i32, i32 } 16%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 17%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 18%"struct.(anonymous namespace)::SharedMemorySmartStackTy" = type { [512 x i8], [1024 x i8] } 19%"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr } 20%"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } 21 22@__omp_rtl_assume_teams_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 23@__omp_rtl_assume_threads_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 24@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 25@1 = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 26@__omp_offloading_20_11e3950_main_l12_exec_mode = weak addrspace(1) constant i8 1 27@__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 28@__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 29@omptarget_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 4 30@__omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null } 31@IsSPMDMode = weak hidden addrspace(3) global i32 undef, align 4 32@.str.12 = private unnamed_addr addrspace(4) constant [47 x i8] c"ValueRAII initialization with wrong old value!\00", align 1 33@_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16 34@_ZN12_GLOBAL__N_19TeamStateE = internal unnamed_addr addrspace(3) global %"struct.(anonymous namespace)::TeamStateTy" undef, align 8 35@_ZN12_GLOBAL__N_112ThreadStatesE = internal unnamed_addr addrspace(3) global [1024 x ptr] undef, align 16 36@.str.12.47 = private unnamed_addr addrspace(4) constant [49 x i8] c"Thread state modified while explicitly disabled!\00", align 1 37@_ZL29SharedMemVariableSharingSpace = internal unnamed_addr addrspace(3) global [64 x ptr] undef, align 16 38@G = global i32 undef 39@llvm.used = appending addrspace(1) global [2 x ptr] [ptr addrspacecast (ptr addrspace(3) @IsSPMDMode to ptr), ptr addrspacecast (ptr addrspace(4) @omptarget_device_environment to ptr)], section "llvm.metadata" 40@llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_exec_mode to ptr)], section "llvm.metadata" 41 42; Function Attrs: alwaysinline convergent norecurse nounwind 43define weak_odr amdgpu_kernel void @__omp_offloading_20_11e3950_main_l12(ptr %dyn, i64 noundef %nxyz, i64 noundef %ng, ptr noundef nonnull align 8 dereferenceable(8) %aa) local_unnamed_addr #0 { 44entry: 45 %ng1 = alloca i32, align 4 46 %captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5) 47 %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_kernel_environment to ptr), ptr %dyn) 48 %exec_user_code = icmp eq i32 %0, -1 49 br i1 %exec_user_code, label %user_code.entry, label %common.ret 50 51user_code.entry: ; preds = %entry 52 %captured_vars_addrs.ascast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr 53 store ptr %ng1, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !7 54 call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i32 0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.ascast, i64 2) 55 call void @__kmpc_target_deinit() 56 br label %common.ret 57 58common.ret: ; preds = %user_code.entry, %entry 59 ret void 60} 61 62; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn 63declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) #1 64 65; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn 66define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid., ptr nocapture nonnull align 4 %ng, ptr nocapture nonnull align 8 %aa) #2 { 67entry: 68 %isspmd = load i32, ptr addrspace(3) @IsSPMDMode 69 store i32 %isspmd, ptr @G 70 ret void 71} 72 73; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn 74declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1 75 76; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 77define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 noundef %1) #3 { 78entry: 79 %isspmd = load i32, ptr addrspace(3) @IsSPMDMode 80 store i32 %isspmd, ptr @G 81 ret void 82} 83 84; Function Attrs: nounwind readnone speculatable willreturn 85declare i32 @llvm.amdgcn.workitem.id.x() #4 86 87; Function Attrs: nounwind readnone speculatable willreturn 88declare i32 @llvm.amdgcn.workgroup.id.x() #4 89 90; Function Attrs: nounwind readnone speculatable willreturn 91declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #4 92 93; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn 94declare i32 @llvm.umin.i32(i32, i32) #5 95 96; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn 97declare void @llvm.assume(i1 noundef) #6 98 99; Function Attrs: convergent nounwind willreturn 100declare void @llvm.amdgcn.s.barrier() #7 101 102; Function Attrs: convergent mustprogress noinline nounwind willreturn 103define internal fastcc void @_ZN4ompx11synchronize14threadsAlignedEv() unnamed_addr #8 { 104entry: 105 call void @llvm.amdgcn.s.barrier() #13 106 ret void 107} 108 109; Function Attrs: convergent nounwind 110; define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 { 111define internal i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(24) %KernelEnvironment, ptr %dyn) local_unnamed_addr #9 { 112entry: 113 %0 = and i32 undef, undef 114 %ExecMode = getelementptr inbounds %struct.ConfigurationEnvironmentTy, ptr %KernelEnvironment, i64 0, i32 2 115 %Mode = load i8, ptr %ExecMode, align 2, !tbaa !28 116 %1 = and i8 %Mode, 2 117 %tobool.not = icmp eq i8 %1, 0 118 br i1 %tobool.not, label %if.else, label %if.then 119 120if.then: ; preds = %entry 121 %2 = call i32 @llvm.amdgcn.workitem.id.x() #13, !range !11 122 %3 = call i32 @llvm.amdgcn.workgroup.id.x() #13 123 %4 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #13 124 %5 = getelementptr i8, ptr addrspace(4) %4, i64 12 125 %6 = load i32, ptr addrspace(4) %5, align 4, !invariant.load !12 126 %7 = getelementptr i8, ptr addrspace(4) %4, i64 4 127 %8 = load i16, ptr addrspace(4) %7, align 4, !range !13, !invariant.load !12 128 %conv.i.i7.i.i.i = zext i16 %8 to i32 129 %mul.i.i8.i.i.i = mul i32 %3, %conv.i.i7.i.i.i 130 %sub.i.i9.i.i.i = sub i32 %6, %mul.i.i8.i.i.i 131 %9 = call i32 @llvm.umin.i32(i32 %sub.i.i9.i.i.i, i32 %conv.i.i7.i.i.i) #13 132 %cmp4.i.i.i = icmp ult i32 %2, %9 133 call void @llvm.assume(i1 %cmp4.i.i.i) #13 134 %cmp.i.i8 = icmp eq i32 %2, 0 135 br i1 %cmp.i.i8, label %if.then.i, label %_ZN4ompx5state4initEb.exit.critedge 136 137if.then.i: ; preds = %if.then 138 store i32 1, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !14 139 store i8 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), align 16, !tbaa !18 140 store i32 %9, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8, !tbaa !19 141 store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 1), align 4, !tbaa !23 142 store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 2), align 8, !tbaa !24 143 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 3), align 4, !tbaa !25 144 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 4), align 8, !tbaa !26 145 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 5), align 4, !tbaa !27 146 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8, !tbaa !28 147 store ptr null, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 2), align 8, !tbaa !29 148 br label %_ZN4ompx5state4initEb.exit 149 150_ZN4ompx5state4initEb.exit.critedge: ; preds = %if.then 151 %arrayidx.i.i.c = getelementptr inbounds [1024 x i8], ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), i32 0, i32 %2 152 store i8 0, ptr addrspace(3) %arrayidx.i.i.c, align 1, !tbaa !18 153 br label %_ZN4ompx5state4initEb.exit 154 155_ZN4ompx5state4initEb.exit: ; preds = %_ZN4ompx5state4initEb.exit.critedge, %if.then.i 156 %arrayidx.i = getelementptr inbounds [1024 x ptr], ptr addrspace(3) @_ZN12_GLOBAL__N_112ThreadStatesE, i32 0, i32 %2 157 store ptr null, ptr addrspace(3) %arrayidx.i, align 8, !tbaa !30 158 call fastcc void @_ZN4ompx11synchronize14threadsAlignedEv() #14 159 br label %if.end 160 161if.else: ; preds = %entry 162 %10 = call i32 @llvm.amdgcn.workgroup.id.x() #13 163 %11 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #13 164 %12 = getelementptr i8, ptr addrspace(4) %11, i64 12 165 %13 = load i32, ptr addrspace(4) %12, align 4, !invariant.load !12 166 %14 = getelementptr i8, ptr addrspace(4) %11, i64 4 167 %15 = load i16, ptr addrspace(4) %14, align 4, !range !13, !invariant.load !12 168 %conv.i.i.i.i.i.i = zext i16 %15 to i32 169 %mul.i.i.i.i.i.i = mul i32 %10, %conv.i.i.i.i.i.i 170 %sub.i.i.i.i.i.i = sub i32 %13, %mul.i.i.i.i.i.i 171 %16 = call i32 @llvm.umin.i32(i32 %sub.i.i.i.i.i.i, i32 %conv.i.i.i.i.i.i) #13 172 %17 = call i32 @llvm.amdgcn.workitem.id.x() #13 173 %cmp.i.i.i.i26 = icmp ult i32 %17, %16 174 call void @llvm.assume(i1 %cmp.i.i.i.i26) #13 175 %sub.i.i.i27 = add nsw i32 %16, -1 176 %and.i.i.i28 = and i32 %sub.i.i.i27, -64 177 %cmp.i2.i.i29 = icmp eq i32 %17, %and.i.i.i28 178 br i1 %cmp.i2.i.i29, label %if.then.i30, label %_ZN4ompx5state4initEb.exit55.critedge 179 180if.then.i30: ; preds = %if.else 181 store i32 0, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !14 182 %arrayidx.i.i46 = getelementptr inbounds [1024 x i8], ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), i32 0, i32 %17 183 store i8 0, ptr addrspace(3) %arrayidx.i.i46, align 1, !tbaa !18 184 %sub.i.i = add nsw i32 %16, -64 185 store i32 %sub.i.i, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8, !tbaa !19 186 store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 1), align 4, !tbaa !23 187 store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 2), align 8, !tbaa !24 188 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 3), align 4, !tbaa !25 189 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 4), align 8, !tbaa !26 190 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 5), align 4, !tbaa !27 191 store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8, !tbaa !28 192 store ptr null, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 2), align 8, !tbaa !29 193 br label %_ZN4ompx5state4initEb.exit55 194 195_ZN4ompx5state4initEb.exit55.critedge: ; preds = %if.else 196 %arrayidx.i.i46.c = getelementptr inbounds [1024 x i8], ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), i32 0, i32 %17 197 store i8 0, ptr addrspace(3) %arrayidx.i.i46.c, align 1, !tbaa !18 198 br label %_ZN4ompx5state4initEb.exit55 199 200_ZN4ompx5state4initEb.exit55: ; preds = %_ZN4ompx5state4initEb.exit55.critedge, %if.then.i30 201 %arrayidx.i53 = getelementptr inbounds [1024 x ptr], ptr addrspace(3) @_ZN12_GLOBAL__N_112ThreadStatesE, i32 0, i32 %17 202 store ptr null, ptr addrspace(3) %arrayidx.i53, align 8, !tbaa !30 203 br label %if.end 204 205if.end: ; preds = %_ZN4ompx5state4initEb.exit55, %_ZN4ompx5state4initEb.exit 206 %18 = call i32 @llvm.amdgcn.workgroup.id.x() #13 207 %19 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #13 208 %20 = getelementptr i8, ptr addrspace(4) %19, i64 12 209 %21 = load i32, ptr addrspace(4) %20, align 4, !invariant.load !12 210 %22 = getelementptr i8, ptr addrspace(4) %19, i64 4 211 %23 = load i16, ptr addrspace(4) %22, align 4, !range !13, !invariant.load !12 212 %conv.i.i.i.i.i73 = zext i16 %23 to i32 213 %mul.i.i.i.i.i74 = mul i32 %18, %conv.i.i.i.i.i73 214 %sub.i.i.i.i.i75 = sub i32 %21, %mul.i.i.i.i.i74 215 %24 = call i32 @llvm.umin.i32(i32 %sub.i.i.i.i.i75, i32 %conv.i.i.i.i.i73) #13 216 %25 = call i32 @llvm.amdgcn.workitem.id.x() #13 217 %cmp.i.i.i79 = icmp ult i32 %25, %24 218 call void @llvm.assume(i1 %cmp.i.i.i79) #13 219 br i1 %tobool.not, label %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit, label %_ZN4ompx7mapping12getBlockSizeEb.exit.i64 220 221_ZN4ompx7mapping12getBlockSizeEb.exit.i64: ; preds = %if.end 222 %26 = load i32, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8 223 %cmp.i.i.i63 = icmp eq i32 %24, %26 224 call void @llvm.assume(i1 %cmp.i.i.i63) #13 225 %27 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 1), align 4 226 %cmp9.i.i.i = icmp eq i32 %27, 0 227 call void @llvm.assume(i1 %cmp9.i.i.i) #13 228 %28 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 2), align 8 229 %cmp19.i.i.i = icmp eq i32 %28, 0 230 call void @llvm.assume(i1 %cmp19.i.i.i) #13 231 %29 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 3), align 4 232 %cmp29.i.i.i = icmp eq i32 %29, 1 233 call void @llvm.assume(i1 %cmp29.i.i.i) #13 234 %30 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 4), align 8 235 %cmp39.i.i.i = icmp eq i32 %30, 1 236 call void @llvm.assume(i1 %cmp39.i.i.i) #13 237 %31 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 5), align 4 238 %cmp49.i.i.i = icmp eq i32 %31, 1 239 call void @llvm.assume(i1 %cmp49.i.i.i) #13 240 %32 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8 241 %cmp.i.i67 = icmp eq i32 %32, 1 242 call void @llvm.assume(i1 %cmp.i.i67) #13 243 %33 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !14 244 %tobool.i59.i = icmp ne i32 %33, 0 245 call void @llvm.assume(i1 %tobool.i59.i) #13 246 br label %_ZN14DebugEntryRAIID2Ev.exit250 247 248_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit: ; preds = %if.end 249 %sub.i.i83 = add nsw i32 %24, -1 250 %and.i.i84 = and i32 %sub.i.i83, -64 251 %cmp.i2.i = icmp eq i32 %25, %and.i.i84 252 br i1 %cmp.i2.i, label %_ZN14DebugEntryRAIID2Ev.exit250, label %if.end10 253 254if.end10: ; preds = %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit 255 %sub.i = add nsw i32 %24, -64 256 %cmp = icmp ult i32 %25, %sub.i 257 %34 = load i8, ptr %KernelEnvironment, align 8 258 %UseGenericStateMachine = icmp ne i8 %34, 0 259 %or.cond251 = select i1 %UseGenericStateMachine, i1 %cmp, i1 false 260 br i1 %or.cond251, label %do.body.i, label %_ZN14DebugEntryRAIID2Ev.exit250 261 262do.body.i: ; preds = %if.end10 263 call void @llvm.amdgcn.s.barrier() #13 264 br label %_ZN14DebugEntryRAIID2Ev.exit250 265 266_ZN14DebugEntryRAIID2Ev.exit250: ; preds = %do.body.i, %if.end10, %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit, %_ZN4ompx7mapping12getBlockSizeEb.exit.i64 267 %retval.0 = phi i32 [ -1, %_ZN4ompx7mapping12getBlockSizeEb.exit.i64 ], [ -1, %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit ], [ %25, %do.body.i ], [ %25, %if.end10 ] 268 ret i32 %retval.0 269} 270 271; Function Attrs: nounwind 272define internal void @__kmpc_target_deinit() local_unnamed_addr #10 { 273 ret void 274} 275 276; Function Attrs: convergent nounwind 277declare void @__kmpc_parallel_51(ptr nocapture noundef readnone %ident, i32 noundef %0, i32 noundef %if_expr, i32 noundef %num_threads, i32 noundef %proc_bind, ptr noundef %fn, ptr noundef %wrapper_fn, ptr noundef %args, i64 noundef %nargs) 278 279; Function Attrs: argmemonly nofree nounwind willreturn 280declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #12 281 282attributes #0 = { alwaysinline convergent norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" } 283attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn } 284attributes #2 = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" } 285attributes #3 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" } 286attributes #4 = { nounwind readnone speculatable willreturn } 287attributes #5 = { nocallback nofree nosync nounwind readnone speculatable willreturn } 288attributes #6 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } 289attributes #7 = { convergent nounwind willreturn } 290attributes #8 = { convergent mustprogress noinline nounwind willreturn "frame-pointer"="none" "llvm.assume"="ompx_aligned_barrier,ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" } 291attributes #9 = { convergent nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" } 292attributes #10 = { nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" } 293attributes #11 = { convergent nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" } 294attributes #12 = { argmemonly nofree nounwind willreturn } 295attributes #13 = { nounwind } 296attributes #14 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier,ompx_no_call_asm" } 297attributes #15 = { convergent nounwind } 298 299!omp_offload.info = !{!0} 300!llvm.module.flags = !{!2, !3, !4, !5} 301!llvm.ident = !{!6} 302 303!0 = !{i32 0, i32 32, i32 18757968, !"main", i32 12, i32 0} 304!2 = !{i32 1, !"wchar_size", i32 4} 305!3 = !{i32 7, !"openmp", i32 50} 306!4 = !{i32 7, !"openmp-device", i32 50} 307!5 = !{i32 7, !"PIC Level", i32 2} 308!6 = !{!"clang version 15.0.0"} 309!7 = !{!8, !8, i64 0} 310!8 = !{!"any pointer", !9, i64 0} 311!9 = !{!"omnipotent char", !10, i64 0} 312!10 = !{!"Simple C/C++ TBAA"} 313!11 = !{i32 0, i32 1024} 314!12 = !{} 315!13 = !{i16 1, i16 1025} 316!14 = !{!15, !15, i64 0} 317!15 = !{!"int", !16, i64 0} 318!16 = !{!"omnipotent char", !17, i64 0} 319!17 = !{!"Simple C++ TBAA"} 320!18 = !{!16, !16, i64 0} 321!19 = !{!20, !15, i64 0} 322!20 = !{!"_ZTSN12_GLOBAL__N_111TeamStateTyE", !21, i64 0, !15, i64 24, !22, i64 32} 323!21 = !{!"_ZTSN12_GLOBAL__N_110ICVStateTyE", !15, i64 0, !15, i64 4, !15, i64 8, !15, i64 12, !15, i64 16, !15, i64 20} 324!22 = !{!"any pointer", !16, i64 0} 325!23 = !{!20, !15, i64 4} 326!24 = !{!20, !15, i64 8} 327!25 = !{!20, !15, i64 12} 328!26 = !{!20, !15, i64 16} 329!27 = !{!20, !15, i64 20} 330!28 = !{!20, !15, i64 24} 331!29 = !{!20, !22, i64 32} 332!30 = !{!22, !22, i64 0} 333!31 = !{!"branch_weights", i32 2000, i32 1} 334