Lines Matching defs:CGF

47   void Enter(CodeGenFunction &CGF) override {
48 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
50 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
51 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
52 ContBlock = CGF.createBasicBlock("omp_if.end");
54 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
55 CGF.EmitBlock(ThenBlock);
58 void Done(CodeGenFunction &CGF) {
60 CGF.EmitBranch(ContBlock);
61 CGF.EmitBlock(ContBlock, true);
63 void Exit(CodeGenFunction &CGF) override {
64 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
185 CodeGenFunction &CGF;
206 if (auto *CSI = CGF.CapturedStmtInfo) {
232 if ((!CGF.CapturedStmtInfo ||
233 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
311 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
317 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
322 CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
324 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
726 void Enter(CodeGenFunction &CGF) override {
727 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
728 RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ false);
730 RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
732 void Exit(CodeGenFunction &CGF) override {
733 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
734 RT.clearLocThreadIdInsertPt(CGF);
735 RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
746 CodeGenFunction &CGF,
752 computeMinAndMaxThreadsAndTeams(D, CGF, Attrs);
754 CGBuilderTy &Bld = CGF.Builder;
757 emitGenericVarsProlog(CGF, EST.Loc);
760 void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
764 emitGenericVarsEpilog(CGF);
790 CGBuilderTy &Bld = CGF.Builder;
821 void Enter(CodeGenFunction &CGF) override {
826 RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ true);
828 RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
830 void Exit(CodeGenFunction &CGF) override {
835 RT.clearLocThreadIdInsertPt(CGF);
836 RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
896 void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
902 void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF,
908 void CGOpenMPRuntimeGPU::emitNumTeamsClause(CodeGenFunction &CGF,
914 CodeGenFunction &CGF, const OMPExecutableDirective &D,
922 CGF, D, ThreadIDVar, InnermostKind, CodeGen));
972 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1012 void Enter(CodeGenFunction &CGF) override {
1014 static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
1016 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
1026 Rt.emitGenericVarsProlog(CGF, Loc);
1028 void Exit(CodeGenFunction &CGF) override {
1029 static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
1030 .emitGenericVarsEpilog(CGF);
1035 CGF, D, ThreadIDVar, InnermostKind, CodeGen);
1040 void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
1045 CGBuilderTy &Bld = CGF.Builder;
1047 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
1060 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
1061 ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
1065 llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())};
1067 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1079 CGF.MakeNaturalAlignPointeeRawAddrLValue(CastedVoidPtr, VarTy);
1085 CGF.EmitStoreOfScalar(ParValue, VarAddr);
1086 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
1088 if (auto *DI = CGF.getDebugInfo())
1095 getKmpcAllocShared(CGF, VD);
1097 LValue Base = CGF.MakeAddrLValue(AddrSizePair.first, VD->getType(),
1100 I->getSecond().MappedParams->setVarAddr(CGF, VD, Base.getAddress());
1102 I->getSecond().MappedParams->apply(CGF);
1105 bool CGOpenMPRuntimeGPU::isDelayedVariableLengthDecl(CodeGenFunction &CGF,
1107 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
1116 CGOpenMPRuntimeGPU::getKmpcAllocShared(CodeGenFunction &CGF,
1118 CGBuilderTy &Bld = CGF.Builder;
1121 llvm::Value *Size = CGF.getTypeSize(VD->getType());
1124 Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));
1126 llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());
1133 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1143 CodeGenFunction &CGF,
1146 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1151 void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {
1155 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
1161 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1168 I->getSecond().MappedParams->restore(CGF);
1171 CGF.getTypeSize(VD->getType())};
1172 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1179 void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
1184 if (!CGF.HaveInsertPoint())
1189 RawAddress ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1191 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
1198 OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).emitRawPointer(CGF));
1201 emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1204 void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
1210 if (!CGF.HaveInsertPoint())
1214 NumThreads](CodeGenFunction &CGF,
1216 CGBuilderTy &Bld = CGF.Builder;
1227 CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
1229 Address CapturedVarsAddrs = CGF.CreateDefaultAlignTempAlloca(
1235 ASTContext &Ctx = CGF.getContext();
1241 PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
1243 PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
1244 CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
1252 IfCondVal = Bld.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.Int32Ty,
1255 IfCondVal = llvm::ConstantInt::get(CGF.Int32Ty, 1);
1258 NumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, -1);
1260 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty),
1263 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1266 getThreadID(CGF, Loc),
1269 llvm::ConstantInt::get(CGF.Int32Ty, -1),
1272 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1273 CGF.VoidPtrPtrTy),
1275 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1281 RCG(CGF);
1284 void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {
1286 if (!CGF.HaveInsertPoint())
1293 llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
1294 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1299 void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
1304 if (!CGF.HaveInsertPoint())
1308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1309 getThreadID(CGF, Loc)};
1311 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317 CodeGenFunction &CGF, StringRef CriticalName,
1320 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");
1321 llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");
1322 llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");
1323 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
1324 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
1326 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
1329 llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1332 llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
1335 llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);
1339 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);
1340 Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");
1341 LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);
1342 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,
1346 CGF.EmitBlock(LoopBB);
1347 llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
1348 llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);
1349 CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
1353 CGF.EmitBlock(TestBB);
1354 CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
1356 CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);
1357 CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
1360 CGF.EmitBlock(BodyBB);
1363 CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
1370 CGF.EmitBlock(SyncBB);
1372 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1377 CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
1378 CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);
1379 CGF.EmitBranch(LoopBB);
1382 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1386 static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
1389 assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&
1391 assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&
1393 llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);
1396 if (CGF.getContext().getTypeSizeInChars(ValTy) ==
1397 CGF.getContext().getTypeSizeInChars(CastTy))
1398 return CGF.Builder.CreateBitCast(Val, LLVMCastTy);
1400 return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
1402 Address CastItem = CGF.CreateMemTemp(CastTy);
1404 CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy,
1407 return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc,
1655 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
1658 if (!CGF.HaveInsertPoint())
1671 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
1690 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1693 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
1694 CGF.AllocaInsertPt->getIterator());
1695 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
1696 CGF.Builder.GetInsertPoint());
1698 CodeGenIP, CGF.SourceLocToDebugLoc(Loc));
1701 CodeGenFunction::OMPPrivateScope Scope(CGF);
1708 ElementType = CGF.ConvertTypeForMem(Private->getType());
1711 PrivateVariable = CGF.GetAddrOfLocalVar(RHSVar).emitRawPointer(CGF);
1714 Variable = CGF.GetAddrOfLocalVar(LHSVar).emitRawPointer(CGF);
1716 switch (CGF.getEvaluationKind(Private->getType())) {
1730 CGF.Builder.restoreIP(CodeGenIP);
1731 auto *CurFn = CGF.CurFn;
1732 CGF.CurFn = NewFunc;
1734 *LHSPtr = CGF.GetAddrOfLocalVar(
1736 .emitRawPointer(CGF);
1737 *RHSPtr = CGF.GetAddrOfLocalVar(
1739 .emitRawPointer(CGF);
1741 emitSingleReductionCombiner(CGF, ReductionOps[I], Privates[I],
1745 CGF.CurFn = CurFn;
1747 return InsertPointTy(CGF.Builder.GetInsertBlock(),
1748 CGF.Builder.GetInsertPoint());
1760 CGF.getTarget().getGridValue(),
1762 CGF.Builder.restoreIP(AfterIP);
1799 CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
1805 Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);
1811 CGF.getTypes().getTargetAddressSpace(NativePointeeTy);
1813 llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, /*Volatile=*/false,
1816 TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1818 llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace));
1819 Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
1820 CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
1826 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
1843 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NativeArg, TargetType));
1845 CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
1891 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
1892 CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
1898 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1900 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
1904 Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).emitRawPointer(CGF));
1905 Args.emplace_back(ZeroAddr.emitRawPointer(CGF));
1907 CGBuilderTy &Bld = CGF.Builder;
1913 CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
1916 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1925 SharedArgListAddress = CGF.EmitLoadOfPointer(
1926 GlobalArgs, CGF.getContext()
1927 .getPointerType(CGF.getContext().VoidPtrTy)
1934 Src, Bld.getPtrTy(0), CGF.SizeTy);
1935 llvm::Value *LB = CGF.EmitLoadOfScalar(
1938 CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
1944 CGF.SizeTy);
1945 llvm::Value *UB = CGF.EmitLoadOfScalar(
1948 CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
1954 ASTContext &CGFContext = CGF.getContext();
1959 Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)),
1960 CGF.ConvertTypeForMem(ElemTy));
1961 llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
1967 Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
1974 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
1975 CGF.FinishFunction();
1979 void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
1985 assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
1997 NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
2004 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
2017 auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
2033 emitGenericVarsProlog(CGF, D->getBeginLoc());
2037 void Emit(CodeGenFunction &CGF, Flags flags) override {
2038 static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
2039 .emitGenericVarsEpilog(CGF);
2042 CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
2046 Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
2072 llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
2082 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2083 GV, CGF.Builder.getPtrTy(CGM.getContext().getTargetAddressSpace(
2092 auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
2113 void CGOpenMPRuntimeGPU::functionFinished(CodeGenFunction &CGF) {
2114 FunctionGlobalizedDecls.erase(CGF.CurFn);
2115 CGOpenMPRuntime::functionFinished(CGF);
2119 CodeGenFunction &CGF, const OMPLoopDirective &S,
2122 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
2125 Chunk = CGF.EmitScalarConversion(
2126 RT.getGPUNumThreads(CGF),
2127 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2132 CGF, S, ScheduleKind, Chunk);
2136 CodeGenFunction &CGF, const OMPLoopDirective &S,
2142 ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
2143 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2148 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
2164 Address VDAddr = CGF.GetAddrOfLocalVar(VD);
2167 VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
2169 VDLVal = CGF.MakeAddrLValue(
2174 if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
2176 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
2177 llvm::Value *CXXThis = CGF.LoadCXXThis();
2178 CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
2190 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
2191 Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD));
2193 VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
2196 CGF.EmitStoreOfScalar(VDAddr.emitRawPointer(CGF), VarLVal);
2347 llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
2348 CGBuilderTy &Bld = CGF.Builder;
2349 llvm::Module *M = &CGF.CGM.getModule();
2353 F = llvm::Function::Create(llvm::FunctionType::get(CGF.Int32Ty, {}, false),
2355 &CGF.CGM.getModule());
2360 llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {
2362 return CGF.EmitRuntimeCall(