Lines Matching defs:CGF

47   void Enter(CodeGenFunction &CGF) override {
48 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
50 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
51 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
52 ContBlock = CGF.createBasicBlock("omp_if.end");
54 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
55 CGF.EmitBlock(ThenBlock);
58 void Done(CodeGenFunction &CGF) {
60 CGF.EmitBranch(ContBlock);
61 CGF.EmitBlock(ContBlock, true);
63 void Exit(CodeGenFunction &CGF) override {
64 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
185 CodeGenFunction &CGF;
206 if (auto *CSI = CGF.CapturedStmtInfo) {
232 if ((!CGF.CapturedStmtInfo ||
233 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
311 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
317 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
322 CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
324 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
726 void Enter(CodeGenFunction &CGF) override {
727 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
728 RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ false);
730 RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
732 void Exit(CodeGenFunction &CGF) override {
733 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
734 RT.clearLocThreadIdInsertPt(CGF);
735 RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
746 CodeGenFunction &CGF,
750 computeMinAndMaxThreadsAndTeams(D, CGF, MinThreadsVal, MaxThreadsVal,
753 CGBuilderTy &Bld = CGF.Builder;
757 emitGenericVarsProlog(CGF, EST.Loc);
760 void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
764 emitGenericVarsEpilog(CGF);
790 CGBuilderTy &Bld = CGF.Builder;
821 void Enter(CodeGenFunction &CGF) override {
826 RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ true);
828 RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
830 void Exit(CodeGenFunction &CGF) override {
835 RT.clearLocThreadIdInsertPt(CGF);
836 RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
896 void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
902 void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF,
908 void CGOpenMPRuntimeGPU::emitNumTeamsClause(CodeGenFunction &CGF,
914 CodeGenFunction &CGF, const OMPExecutableDirective &D,
922 CGF, D, ThreadIDVar, InnermostKind, CodeGen));
972 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1012 void Enter(CodeGenFunction &CGF) override {
1014 static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
1016 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
1026 Rt.emitGenericVarsProlog(CGF, Loc);
1028 void Exit(CodeGenFunction &CGF) override {
1029 static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
1030 .emitGenericVarsEpilog(CGF);
1035 CGF, D, ThreadIDVar, InnermostKind, CodeGen);
1040 void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
1045 CGBuilderTy &Bld = CGF.Builder;
1047 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
1060 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
1061 ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
1065 llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())};
1067 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1076 llvm::PointerType *VarPtrTy = CGF.ConvertTypeForMem(VarTy)->getPointerTo();
1080 CGF.MakeNaturalAlignPointeeRawAddrLValue(CastedVoidPtr, VarTy);
1086 CGF.EmitStoreOfScalar(ParValue, VarAddr);
1087 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
1089 if (auto *DI = CGF.getDebugInfo())
1096 getKmpcAllocShared(CGF, VD);
1098 LValue Base = CGF.MakeAddrLValue(AddrSizePair.first, VD->getType(),
1101 I->getSecond().MappedParams->setVarAddr(CGF, VD, Base.getAddress());
1103 I->getSecond().MappedParams->apply(CGF);
1106 bool CGOpenMPRuntimeGPU::isDelayedVariableLengthDecl(CodeGenFunction &CGF,
1108 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
1117 CGOpenMPRuntimeGPU::getKmpcAllocShared(CodeGenFunction &CGF,
1119 CGBuilderTy &Bld = CGF.Builder;
1122 llvm::Value *Size = CGF.getTypeSize(VD->getType());
1125 Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));
1127 llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());
1134 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1144 CodeGenFunction &CGF,
1147 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1152 void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {
1156 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
1162 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1169 I->getSecond().MappedParams->restore(CGF);
1172 CGF.getTypeSize(VD->getType())};
1173 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1180 void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
1185 if (!CGF.HaveInsertPoint())
1190 RawAddress ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1192 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
1199 OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).emitRawPointer(CGF));
1202 emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1205 void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
1211 if (!CGF.HaveInsertPoint())
1215 NumThreads](CodeGenFunction &CGF,
1217 CGBuilderTy &Bld = CGF.Builder;
1228 CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
1230 Address CapturedVarsAddrs = CGF.CreateDefaultAlignTempAlloca(
1236 ASTContext &Ctx = CGF.getContext();
1242 PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
1244 PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
1245 CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
1253 IfCondVal = Bld.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.Int32Ty,
1256 IfCondVal = llvm::ConstantInt::get(CGF.Int32Ty, 1);
1259 NumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, -1);
1261 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty),
1264 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1267 getThreadID(CGF, Loc),
1270 llvm::ConstantInt::get(CGF.Int32Ty, -1),
1273 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1274 CGF.VoidPtrPtrTy),
1276 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1282 RCG(CGF);
1285 void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {
1287 if (!CGF.HaveInsertPoint())
1294 llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300 void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
1305 if (!CGF.HaveInsertPoint())
1309 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1310 getThreadID(CGF, Loc)};
1312 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1318 CodeGenFunction &CGF, StringRef CriticalName,
1321 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");
1322 llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");
1323 llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");
1324 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
1325 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
1327 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
1330 llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1333 llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
1336 llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);
1340 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);
1341 Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");
1342 LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);
1343 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,
1347 CGF.EmitBlock(LoopBB);
1348 llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
1349 llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);
1350 CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
1354 CGF.EmitBlock(TestBB);
1355 CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
1357 CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);
1358 CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
1361 CGF.EmitBlock(BodyBB);
1364 CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
1371 CGF.EmitBlock(SyncBB);
1373 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1378 CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
1379 CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);
1380 CGF.EmitBranch(LoopBB);
1383 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1387 static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
1390 assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&
1392 assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&
1394 llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);
1397 if (CGF.getContext().getTypeSizeInChars(ValTy) ==
1398 CGF.getContext().getTypeSizeInChars(CastTy))
1399 return CGF.Builder.CreateBitCast(Val, LLVMCastTy);
1401 return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
1403 Address CastItem = CGF.CreateMemTemp(CastTy);
1405 CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy,
1408 return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc,
1656 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
1659 if (!CGF.HaveInsertPoint())
1672 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
1691 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1694 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
1695 CGF.AllocaInsertPt->getIterator());
1696 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
1697 CGF.Builder.GetInsertPoint());
1699 CodeGenIP, CGF.SourceLocToDebugLoc(Loc));
1702 CodeGenFunction::OMPPrivateScope Scope(CGF);
1709 ElementType = CGF.ConvertTypeForMem(Private->getType());
1712 PrivateVariable = CGF.GetAddrOfLocalVar(RHSVar).emitRawPointer(CGF);
1715 Variable = CGF.GetAddrOfLocalVar(LHSVar).emitRawPointer(CGF);
1717 switch (CGF.getEvaluationKind(Private->getType())) {
1731 CGF.Builder.restoreIP(CodeGenIP);
1732 auto *CurFn = CGF.CurFn;
1733 CGF.CurFn = NewFunc;
1735 *LHSPtr = CGF.GetAddrOfLocalVar(
1737 .emitRawPointer(CGF);
1738 *RHSPtr = CGF.GetAddrOfLocalVar(
1740 .emitRawPointer(CGF);
1742 emitSingleReductionCombiner(CGF, ReductionOps[I], Privates[I],
1746 CGF.CurFn = CurFn;
1748 return InsertPointTy(CGF.Builder.GetInsertBlock(),
1749 CGF.Builder.GetInsertPoint());
1757 CGF.Builder.restoreIP(OMPBuilder.createReductionsGPU(
1760 CGF.getTarget().getGridValue(), C.getLangOpts().OpenMPCUDAReductionBufNum,
1798 CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
1804 Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);
1810 CGF.getTypes().getTargetAddressSpace(NativePointeeTy);
1812 llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, /*Volatile=*/false,
1815 TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1817 llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace));
1818 Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
1819 CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
1825 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
1842 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NativeArg, TargetType));
1844 CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
1890 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
1891 CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
1897 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1899 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
1903 Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).emitRawPointer(CGF));
1904 Args.emplace_back(ZeroAddr.emitRawPointer(CGF));
1906 CGBuilderTy &Bld = CGF.Builder;
1912 CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
1915 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1924 SharedArgListAddress = CGF.EmitLoadOfPointer(
1925 GlobalArgs, CGF.getContext()
1926 .getPointerType(CGF.getContext().VoidPtrTy)
1933 Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy);
1934 llvm::Value *LB = CGF.EmitLoadOfScalar(
1937 CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
1943 Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy);
1944 llvm::Value *UB = CGF.EmitLoadOfScalar(
1947 CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
1953 ASTContext &CGFContext = CGF.getContext();
1958 Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)),
1959 CGF.ConvertTypeForMem(ElemTy));
1960 llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
1966 Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
1973 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
1974 CGF.FinishFunction();
1978 void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
1984 assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
1996 NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
2003 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
2016 auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
2032 emitGenericVarsProlog(CGF, D->getBeginLoc());
2036 void Emit(CodeGenFunction &CGF, Flags flags) override {
2037 static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
2038 .emitGenericVarsEpilog(CGF);
2041 CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
2045 Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
2073 llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
2083 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2093 auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
2114 void CGOpenMPRuntimeGPU::functionFinished(CodeGenFunction &CGF) {
2115 FunctionGlobalizedDecls.erase(CGF.CurFn);
2116 CGOpenMPRuntime::functionFinished(CGF);
2120 CodeGenFunction &CGF, const OMPLoopDirective &S,
2123 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
2126 Chunk = CGF.EmitScalarConversion(
2127 RT.getGPUNumThreads(CGF),
2128 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2133 CGF, S, ScheduleKind, Chunk);
2137 CodeGenFunction &CGF, const OMPLoopDirective &S,
2143 ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
2144 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2149 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
2165 Address VDAddr = CGF.GetAddrOfLocalVar(VD);
2168 VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
2170 VDLVal = CGF.MakeAddrLValue(
2175 if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
2177 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
2178 llvm::Value *CXXThis = CGF.LoadCXXThis();
2179 CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
2191 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
2192 Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD));
2194 VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
2197 CGF.EmitStoreOfScalar(VDAddr.emitRawPointer(CGF), VarLVal);
2343 llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
2344 CGBuilderTy &Bld = CGF.Builder;
2345 llvm::Module *M = &CGF.CGM.getModule();
2350 llvm::FunctionType::get(CGF.Int32Ty, std::nullopt, false),
2351 llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
2356 llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {
2358 return CGF.EmitRuntimeCall(