Lines Matching +full:os +full:- +full:manifest +full:- +full:offset

1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
13 //===----------------------------------------------------------------------===//
62 #define DEBUG_TYPE "openmp-ir-builder"
68 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
70 "'as-if' properties of runtime calls."),
74 "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
81 /// at position IP1 may change the meaning of IP2 or vice-versa. This is because
154 Kernel->getFnAttribute("target-features").getValueAsString();
229 // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
273 if (Instruction *Term = Source->getTerminator()) {
275 assert(!Br->isConditional() &&
277 BasicBlock *Succ = Br->getSuccessor(0);
278 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
279 Br->setSuccessor(0, Target);
284 NewBr->setDebugLoc(DL);
289 assert(New->getFirstInsertionPt() == New->begin() &&
294 New->splice(New->begin(), Old, IP.getPoint(), Old->end());
306 Builder.SetInsertPoint(Old->getTerminator());
319 Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name,
320 Old->getParent(), Old->getNextNode());
322 New->replaceSuccessorsPhiUsesWith(Old, New);
331 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
345 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
357 return splitBB(Builder, CreateBranch, Old->getName() + Suffix);
396 //===----------------------------------------------------------------------===//
398 //===----------------------------------------------------------------------===//
490 //===----------------------------------------------------------------------===//
492 //===----------------------------------------------------------------------===//
536 bool Param = true) -> void {
601 if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
602 LLVMContext &Ctx = Fn->getContext();
605 // - The callback callee is argument number 2 (microtask).
606 // - The first two arguments of the callback callee are unknown (-1).
607 // - All variadic arguments to the runtime function are passed to the
609 Fn->addMetadata(
612 2, {-1, -1}, /* VarArgsArePassed */ true)}));
616 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
617 << " with type " << *Fn->getFunctionType() << "\n");
621 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
622 << " with type " << *Fn->getFunctionType() << "\n");
641 BasicBlock &EntryBlock = Function->getEntryBlock();
646 for (auto Block = std::next(Function->begin(), 1); Block != Function->end();
648 for (auto Inst = Block->getReverseIterator()->begin();
649 Inst != Block->getReverseIterator()->end();) {
652 if (!isa<ConstantData>(AllocaInst->getArraySize()))
654 AllocaInst->moveBeforePreserving(MoveLocInst);
698 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
699 << " Exit: " << OI.ExitBB->getName() << "\n");
708 // Forward target-cpu, target-features attributes to the outlined function.
709 auto TargetCpuAttr = OuterFn->getFnAttribute("target-cpu");
711 OutlinedFn->addFnAttr(TargetCpuAttr);
713 auto TargetFeaturesAttr = OuterFn->getFnAttribute("target-features");
715 OutlinedFn->addFnAttr(TargetFeaturesAttr);
719 assert(OutlinedFn->getReturnType()->isVoidTy() &&
724 OutlinedFn->removeFromParent();
725 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
730 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
732 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
733 // Move instructions from the to-be-deleted ArtificialEntry to the entry
749 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
752 OI.EntryBB->moveBefore(&ArtificialEntry);
755 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
756 assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
780 // defined/read+writeable allocation sizes would be non-trivial
790 const TargetRegionEntryInfo &EntryInfo) -> void {
816 GV->setVisibility(GlobalValue::HiddenVisibility);
825 // Enable "C-mode".
852 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
853 GV->setAlignment(Align(8));
913 if (DIFile *DIF = DIL->getFile())
914 if (std::optional<StringRef> Source = DIF->getSource())
916 StringRef Function = DIL->getScope()->getSubprogram()->getName();
918 Function = F->getName();
919 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
920 DIL->getColumn(), SrcLocStrSize);
926 Loc.IP.getBlock()->getParent());
1033 Builder.SetInsertPoint(UI->getParent());
1034 UI->eraseFromParent();
1056 M.getDataLayout().getPrefTypeAlign(KernelArgs[I]->getType()));
1122 auto CurFn = Builder.GetInsertBlock()->getParent();
1139 if (Builder.GetInsertPoint() == BB->end()) {
1143 BB->getContext(), BB->getName() + ".cont", BB->getParent());
1146 BB->getTerminator()->eraseFromParent();
1150 BB->getContext(), BB->getName() + ".cncl", BB->getParent());
1166 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
1179 IRBuilder<> &Builder = OMPIRBuilder->Builder;
1188 unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
1192 CI->getParent()->setName("omp_parallel");
1195 Type *PtrTy = OMPIRBuilder->VoidPtr;
1200 Builder.SetInsertPoint(OuterAllocaBB, OuterAllocaBB->getFirstInsertionPt());
1206 if (ArgsAlloca->getAddressSpace())
1212 Value *V = *(CI->arg_begin() + 2 + Idx);
1219 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1227 /* number of threads */ NumThreads ? NumThreads : Builder.getInt32(-1),
1228 /* Proc bind */ Builder.getInt32(-1),
1230 Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1236 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1241 << *Builder.GetInsertBlock()->getParent() << "\n");
1246 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1250 CI->eraseFromParent();
1253 I->eraseFromParent();
1266 IRBuilder<> &Builder = OMPIRBuilder->Builder;
1270 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1273 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1276 if (!F->hasMetadata(LLVMContext::MD_callback)) {
1277 LLVMContext &Ctx = F->getContext();
1280 // - The callback callee is argument number 2 (microtask).
1281 // - The first two arguments of the callback callee are unknown (-1).
1282 // - All variadic arguments to the __kmpc_fork_call are passed to the
1284 F->addMetadata(LLVMContext::MD_callback,
1286 2, {-1, -1},
1297 unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
1300 CI->getParent()->setName("omp_parallel");
1306 Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1311 Value *Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1314 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
1318 auto PtrTy = OMPIRBuilder->VoidPtr;
1323 if (IfCondition && RealArgs.back()->getType() != PtrTy)
1329 << *Builder.GetInsertBlock()->getParent() << "\n");
1334 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1338 CI->eraseFromParent();
1341 I->eraseFromParent();
1386 Function *OuterFn = InsertBB->getParent();
1398 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->begin());
1409 TIDAddr->insertAfter(TIDAddrAlloca);
1414 ZeroAddr->insertAfter(ZeroAddrAlloca);
1427 BasicBlock *EntryBB = UI->getParent();
1428 BasicBlock *PRegEntryBB = EntryBB->splitBasicBlock(UI, "omp.par.entry");
1429 BasicBlock *PRegBodyBB = PRegEntryBB->splitBasicBlock(UI, "omp.par.region");
1431 PRegBodyBB->splitBasicBlock(UI, "omp.par.pre_finalize");
1432 BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(UI, "omp.par.exit");
1435 // Hide "open-ended" blocks from the given FiniCB by setting the right jump
1437 if (IP.getBlock()->end() == IP.getPoint()) {
1441 IP = InsertPointTy(I->getParent(), I->getIterator());
1443 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1444 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1453 Builder.SetInsertPoint(PRegEntryBB->getTerminator());
1469 // PRegionEntryBB <- Privatization allocas are placed here.
1472 // PRegionBodyBB <- BodeGen is invoked here.
1475 // PRegPreFiniBB <- The block we will start finalization from.
1478 // PRegionExitBB <- A common exit to simplify block collection.
1485 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
1520 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
1521 PRegOutlinedExitBB->setName("omp.par.outlined.exit");
1555 if (ParallelRegionBlockSet.count(UserI->getParent()))
1560 // value onto stack and load it back inside the to-be-outlined region. This
1565 if (!V.getType()->isPointerTy()) {
1574 // block of the to-be-outlined region.
1576 InsertBB->getTerminator()->getIterator());
1579 // Load back next to allocations in the to-be-outlined region.
1586 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
1593 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1602 UPtr->set(ReplacementValue);
1606 // wrapped into pointers before passing them into the to-be-outlined region.
1609 // OpenMP-related values (thread ID and zero address pointers) remain leading
1612 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
1617 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
1628 "OpenMP outlining should not produce live-out values!");
1633 dbgs() << " PBR: " << BB->getName() << "\n";
1637 // finalize function a last time to finalize values between the pre-fini
1644 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
1646 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
1652 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1653 UI->eraseFromParent();
1711 // - Allocates space on the stack of an array of DependInfo objects
1712 // - Populates each DependInfo object with relevant information of
1714 // - All code is inserted in the entry block of the current function.
1745 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1812 InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
1813 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
1835 bool HasShareds = StaleCI->arg_size() > 1;
1843 // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
1847 // Argument - `flags`
1860 // Argument - `sizeof_kmp_task_t` (TaskSize)
1867 // Argument - `sizeof_shareds` (SharedsSize)
1873 dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
1878 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
1894 Value *Shareds = StaleCI->getArgOperand(1);
1895 Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
1905 &OldIP.getBlock()->getParent()->getEntryBlock().back());
1964 Builder.GetInsertPoint()->getParent()->getTerminator();
1990 CI->setDebugLoc(StaleCI->getDebugLoc());
2010 StaleCI->eraseFromParent();
2012 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2015 OutlinedFn.getArg(1)->replaceUsesWithIf(
2020 [](Instruction *I) { I->eraseFromParent(); });
2024 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin());
2068 if (IP.getBlock()->end() != IP.getPoint())
2079 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
2080 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2081 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2083 IP = InsertPointTy(I->getParent(), I->getIterator());
2091 // -> OMP.createSection() which generates the IR for each section
2098 // case <NumSection> - 1:
2099 // <SectionStmt[<NumSection> - 1]>;
2109 Function *CurFn = Continue->getParent();
2116 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
2120 {CaseEndBr->getParent(), CaseEndBr->getIterator()});
2146 AfterIP = {FiniBB, FiniBB->begin()};
2160 if (IP.getBlock()->end() != IP.getPoint())
2172 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2173 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2175 IP = InsertPointTy(I->getParent(), I->getIterator());
2190 return OpenMPIRBuilder::InsertPointTy(I->getParent(), IT);
2212 GV->setSection("llvm.metadata");
2235 unsigned LaneIDMask = ~0u >> (32u - LaneIDBits);
2242 Type *FromType = From->getType();
2251 if (ToType->isIntegerTy() && FromType->isIntegerTy())
2259 CastItem, FromType->getPointerTo());
2267 Value *Offset) {
2271 // Cast all types to 32- or 64-bit values before calling shuffle routines.
2282 Builder.CreateCall(ShuffleFunc, {ElemCast, Offset, WarpSizeCast});
2288 Value *Offset, Type *ReductionArrayTy) {
2309 Ptr, IntType->getPointerTo(), Ptr->getName() + ".ascast");
2313 ElemPtr, IntType->getPointerTo(), ElemPtr->getName() + ".ascast");
2315 Function *CurFunc = Builder.GetInsertBlock()->getParent();
2326 Builder.CreatePHI(Ptr->getType(), /*NumReservedValues=*/2);
2327 PhiSrc->addIncoming(Ptr, CurrentBB);
2329 Builder.CreatePHI(ElemPtr->getType(), /*NumReservedValues=*/2);
2330 PhiDest->addIncoming(ElemPtr, CurrentBB);
2337 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2344 IntType, Offset);
2351 PhiSrc->addIncoming(LocalPtr, ThenBB);
2352 PhiDest->addIncoming(LocalElemPtr, ThenBB);
2357 AllocaIP, Builder.CreateLoad(IntType, Ptr), IntType, Offset);
2358 if (ElemType->isIntegerTy() && ElemType->getScalarSizeInBits() <
2359 Res->getType()->getScalarSizeInBits())
2378 // Iterates, element-by-element, through the source Reduce list and
2408 DestAlloca->setAlignment(
2413 DestElementAddr->getName() + ".ascast");
2443 RI.ElementType->getStructElementType(0), SrcRealPtr, ".real");
2447 RI.ElementType->getStructElementType(1), SrcImgPtr, ".imag");
2477 DestElementAddr->getName() + ".ascast");
2494 WcFunc->setAttributes(FuncAttrs);
2495 WcFunc->addParamAttr(0, Attribute::NoUndef);
2496 WcFunc->addParamAttr(1, Attribute::NoUndef);
2503 Argument *ReduceListArg = WcFunc->getArg(0);
2506 Argument *NumWarpsArg = WcFunc->getArg(1);
2537 Builder.GetInsertBlock()->getFirstInsertionPt());
2538 Type *Arg0Type = ReduceListArg->getType();
2539 Type *Arg1Type = NumWarpsArg->getType();
2542 Arg0Type, nullptr, ReduceListArg->getName() + ".addr");
2544 Builder.CreateAlloca(Arg1Type, nullptr, NumWarpsArg->getName() + ".addr");
2546 ReduceListAlloca, Arg0Type, ReduceListAlloca->getName() + ".ascast");
2548 NumWarpsAlloca, Arg1Type->getPointerTo(),
2549 NumWarpsAlloca->getName() + ".ascast");
2554 getInsertPointAfterInstr(&Builder.GetInsertBlock()->back());
2584 CntAddr->getName() + ".ascast");
2592 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2598 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2613 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2642 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2646 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2664 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2688 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2691 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2698 auto *CurFn = Builder.GetInsertBlock()->getParent();
2724 SarFunc->setAttributes(FuncAttrs);
2725 SarFunc->addParamAttr(0, Attribute::NoUndef);
2726 SarFunc->addParamAttr(1, Attribute::NoUndef);
2727 SarFunc->addParamAttr(2, Attribute::NoUndef);
2728 SarFunc->addParamAttr(3, Attribute::NoUndef);
2729 SarFunc->addParamAttr(1, Attribute::SExt);
2730 SarFunc->addParamAttr(2, Attribute::SExt);
2731 SarFunc->addParamAttr(3, Attribute::SExt);
2736 Argument *ReduceListArg = SarFunc->getArg(0);
2738 Argument *LaneIDArg = SarFunc->getArg(1);
2739 // Offset of the remote source lane relative to the current lane.
2740 Argument *RemoteLaneOffsetArg = SarFunc->getArg(2);
2742 Argument *AlgoVerArg = SarFunc->getArg(3);
2744 Type *ReduceListArgType = ReduceListArg->getType();
2745 Type *LaneIDArgType = LaneIDArg->getType();
2746 Type *LaneIDArgPtrType = LaneIDArg->getType()->getPointerTo();
2748 ReduceListArgType, nullptr, ReduceListArg->getName() + ".addr");
2750 LaneIDArg->getName() + ".addr");
2752 LaneIDArgType, nullptr, RemoteLaneOffsetArg->getName() + ".addr");
2754 AlgoVerArg->getName() + ".addr");
2758 // Create a local thread-private variable to host the Reduce list
2765 ReduceListAlloca->getName() + ".ascast");
2767 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->getName() + ".ascast");
2770 RemoteLaneOffsetAlloca->getName() + ".ascast");
2772 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->getName() + ".ascast");
2775 RemoteReductionListAlloca->getName() + ".ascast");
2800 // if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
2801 // LaneId % 2 == 0 && Offset > 0):
2837 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2843 ->addFnAttr(Attribute::NoUnwind);
2846 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2849 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2851 // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
2862 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
2867 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
2870 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
2889 LtGCFunc->setAttributes(FuncAttrs);
2890 LtGCFunc->addParamAttr(0, Attribute::NoUndef);
2891 LtGCFunc->addParamAttr(1, Attribute::NoUndef);
2892 LtGCFunc->addParamAttr(2, Attribute::NoUndef);
2898 Argument *BufferArg = LtGCFunc->getArg(0);
2900 Argument *IdxArg = LtGCFunc->getArg(1);
2902 Argument *ReduceListArg = LtGCFunc->getArg(2);
2905 BufferArg->getName() + ".addr");
2907 IdxArg->getName() + ".addr");
2909 Builder.getPtrTy(), nullptr, ReduceListArg->getName() + ".addr");
2912 BufferArgAlloca->getName() + ".ascast");
2914 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->getName() + ".ascast");
2917 ReduceListArgAlloca->getName() + ".ascast");
2957 RI.ElementType->getStructElementType(0), SrcRealPtr, ".real");
2961 RI.ElementType->getStructElementType(1), SrcImgPtr, ".imag");
2999 LtGRFunc->setAttributes(FuncAttrs);
3000 LtGRFunc->addParamAttr(0, Attribute::NoUndef);
3001 LtGRFunc->addParamAttr(1, Attribute::NoUndef);
3002 LtGRFunc->addParamAttr(2, Attribute::NoUndef);
3008 Argument *BufferArg = LtGRFunc->getArg(0);
3010 Argument *IdxArg = LtGRFunc->getArg(1);
3012 Argument *ReduceListArg = LtGRFunc->getArg(2);
3015 BufferArg->getName() + ".addr");
3017 IdxArg->getName() + ".addr");
3019 Builder.getPtrTy(), nullptr, ReduceListArg->getName() + ".addr");
3024 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
3030 BufferArgAlloca->getName() + ".ascast");
3032 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->getName() + ".ascast");
3035 ReduceListArgAlloca->getName() + ".ascast");
3038 LocalReduceList->getName() + ".ascast");
3064 ->addFnAttr(Attribute::NoUnwind);
3082 LtGCFunc->setAttributes(FuncAttrs);
3083 LtGCFunc->addParamAttr(0, Attribute::NoUndef);
3084 LtGCFunc->addParamAttr(1, Attribute::NoUndef);
3085 LtGCFunc->addParamAttr(2, Attribute::NoUndef);
3091 Argument *BufferArg = LtGCFunc->getArg(0);
3093 Argument *IdxArg = LtGCFunc->getArg(1);
3095 Argument *ReduceListArg = LtGCFunc->getArg(2);
3098 BufferArg->getName() + ".addr");
3100 IdxArg->getName() + ".addr");
3102 Builder.getPtrTy(), nullptr, ReduceListArg->getName() + ".addr");
3105 BufferArgAlloca->getName() + ".ascast");
3107 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->getName() + ".ascast");
3110 ReduceListArgAlloca->getName() + ".ascast");
3147 RI.ElementType->getStructElementType(0), SrcRealPtr, ".real");
3151 RI.ElementType->getStructElementType(1), SrcImgPtr, ".imag");
3190 LtGRFunc->setAttributes(FuncAttrs);
3191 LtGRFunc->addParamAttr(0, Attribute::NoUndef);
3192 LtGRFunc->addParamAttr(1, Attribute::NoUndef);
3193 LtGRFunc->addParamAttr(2, Attribute::NoUndef);
3199 Argument *BufferArg = LtGRFunc->getArg(0);
3201 Argument *IdxArg = LtGRFunc->getArg(1);
3203 Argument *ReduceListArg = LtGRFunc->getArg(2);
3206 BufferArg->getName() + ".addr");
3208 IdxArg->getName() + ".addr");
3210 Builder.getPtrTy(), nullptr, ReduceListArg->getName() + ".addr");
3215 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
3221 BufferArgAlloca->getName() + ".ascast");
3223 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->getName() + ".ascast");
3226 ReduceListArgAlloca->getName() + ".ascast");
3229 LocalReduceList->getName() + ".ascast");
3255 ->addFnAttr(Attribute::NoUnwind);
3276 ReductionFunc->setAttributes(FuncAttrs);
3277 ReductionFunc->addParamAttr(0, Attribute::NoUndef);
3278 ReductionFunc->addParamAttr(1, Attribute::NoUndef);
3287 Argument *Arg0 = ReductionFunc->getArg(0);
3288 Argument *Arg1 = ReductionFunc->getArg(1);
3289 Type *Arg0Type = Arg0->getType();
3290 Type *Arg1Type = Arg1->getType();
3293 Builder.CreateAlloca(Arg0Type, nullptr, Arg0->getName() + ".addr");
3295 Builder.CreateAlloca(Arg1Type, nullptr, Arg1->getName() + ".addr");
3297 LHSAlloca, Arg0Type, LHSAlloca->getName() + ".ascast");
3299 RHSAlloca, Arg1Type, RHSAlloca->getName() + ".ascast");
3316 RHSI8Ptr, RI.PrivateVariable->getType(),
3317 RHSI8Ptr->getName() + ".ascast");
3324 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->getName() + ".ascast");
3350 LHSFixupPtr->replaceUsesWithIf(
3352 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3355 RHSFixupPtr->replaceUsesWithIf(
3357 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3371 assert(RI.Variable && "expected non-null variable");
3372 assert(RI.PrivateVariable && "expected non-null private variable");
3374 "expected non-null reduction generator callback");
3377 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3381 assert(RI.Variable->getType()->isPointerTy() &&
3408 Function *CurFunc = Builder.GetInsertBlock()->getParent();
3411 for (auto Attr : CurFunc->getAttributes().getFnAttrs())
3419 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3441 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
3450 ReductionListAlloca, PtrTy, ReductionListAlloca->getName() + ".ascast");
3552 LHSPtr->replaceUsesWithIf(LHS, [ReductionFunc](const Use &U) {
3553 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3556 RHSPtr->replaceUsesWithIf(RHS, [ReductionFunc](const Use &U) {
3557 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3588 assert(RI.Variable && "expected non-null variable");
3589 assert(RI.PrivateVariable && "expected non-null private variable");
3590 assert(RI.ReductionGen && "expected non-null reduction generator callback");
3591 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
3594 assert(RI.Variable->getType()->isPointerTy() &&
3603 InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
3604 InsertBlock->getTerminator()->eraseFromParent();
3606 // Create and populate array of type-erased pointers to private reduction
3610 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3613 Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
3625 Function *Func = Builder.GetInsertBlock()->getParent();
3626 Module *Module = Func->getParent();
3638 const DataLayout &DL = Module->getDataLayout();
3652 // Create final reduction entry blocks for the atomic and non-atomic case.
3656 BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
3658 BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
3661 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3662 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3664 // Populate the non-atomic reduction using the elementwise reduction function.
3671 // We have one less load for by-ref case because that load is now inside of
3691 // for by-ref case, the load is inside of the reduction region
3718 // function. Partial values are extracted from the type-erased array of
3721 BasicBlock::Create(Module->getContext(), "", ReductionFunc);
3723 Value *LHSArrayPtr = ReductionFunc->getArg(0);
3724 Value *RHSArrayPtr = ReductionFunc->getArg(1);
3731 Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
3737 Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
3743 // store is inside of the reduction region when using by-ref
3806 Module *M = F->getParent();
3807 LLVMContext &Ctx = M->getContext();
3808 Type *IndVarTy = TripCount->getType();
3834 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
3849 IndVarPHI->addIncoming(Next, Latch);
3858 CL->Header = Header;
3859 CL->Cond = Cond;
3860 CL->Latch = Latch;
3861 CL->Exit = Exit;
3864 CL->assertOK();
3874 BasicBlock *NextBB = BB->getNextNode();
3876 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
3878 BasicBlock *After = CL->getAfter();
3886 Builder.CreateBr(CL->getPreheader());
3891 BodyGenCB(CL->getBodyIP(), CL->getIndVar());
3894 CL->assertOK();
3904 // Consider the following difficulties (assuming 8-bit signed integers):
3908 // DO I = 100, 0, -128
3911 auto *IndVarTy = cast<IntegerType>(Start->getType());
3912 assert(IndVarTy == Stop->getType() && "Stop type mismatch");
3913 assert(IndVarTy == Step->getType() && "Step type mismatch");
3976 unsigned Bitwidth = Ty->getIntegerBitWidth();
3990 assert(CLI->isValid() && "Requires a valid canonical loop");
3991 assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
3995 Builder.restoreIP(CLI->getPreheaderIP());
4003 Value *IV = CLI->getIndVar();
4004 Type *IVTy = IV->getType();
4010 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4020 // always iterates from 0 to trip-count with step 1. Note that "init" expects
4022 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4026 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4044 CLI->setTripCount(TripCount);
4050 CLI->mapIndVar([&](Instruction *OldIV) -> Value * {
4051 Builder.SetInsertPoint(CLI->getBody(),
4052 CLI->getBody()->getFirstInsertionPt());
4058 Builder.SetInsertPoint(CLI->getExit(),
4059 CLI->getExit()->getTerminator()->getIterator());
4068 InsertPointTy AfterIP = CLI->getAfterIP();
4069 CLI->invalidate();
4077 assert(CLI->isValid() && "Requires a valid canonical loop");
4080 LLVMContext &Ctx = CLI->getFunction()->getContext();
4081 Value *IV = CLI->getIndVar();
4082 Value *OrigTripCount = CLI->getTripCount();
4083 Type *IVTy = IV->getType();
4084 assert(IVTy->getIntegerBitWidth() <= 64 &&
4086 Type *InternalIVTy = IVTy->getIntegerBitWidth() <= 32 ? Type::getInt32Ty(Ctx)
4109 Builder.restoreIP(CLI->getPreheaderIP());
4112 // TODO: Detect overflow in ubsan or max-out with current tripcount.
4161 BasicBlock *DispatchBody = DispatchCLI->getBody();
4162 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4163 BasicBlock *DispatchExit = DispatchCLI->getExit();
4164 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4165 DispatchCLI->invalidate();
4168 redirectTo(DispatchAfter, CLI->getAfter(), DL);
4169 redirectTo(CLI->getExit(), DispatchLatch, DL);
4173 Builder.restoreIP(CLI->getPreheaderIP());
4177 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4187 CLI->setTripCount(BackcastedChunkTC);
4194 CLI->mapIndVar([&](Instruction *) -> Value * {
4195 Builder.restoreIP(CLI->getBodyIP());
4200 Builder.SetInsertPoint(DispatchExit, DispatchExit->getFirstInsertionPt());
4211 CLI->assertOK();
4214 return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
4223 unsigned Bitwidth = Ty->getIntegerBitWidth();
4224 Module &M = OMPBuilder->M;
4228 return OMPBuilder->getOrCreateRuntimeFunction(
4231 return OMPBuilder->getOrCreateRuntimeFunction(
4236 return OMPBuilder->getOrCreateRuntimeFunction(
4239 return OMPBuilder->getOrCreateRuntimeFunction(
4244 return OMPBuilder->getOrCreateRuntimeFunction(
4247 return OMPBuilder->getOrCreateRuntimeFunction(
4263 Type *TripCountTy = TripCount->getType();
4264 Module &M = OMPBuilder->M;
4265 IRBuilder<> &Builder = OMPBuilder->Builder;
4278 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
4280 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->end())});
4299 IRBuilder<> &Builder = OMPIRBuilder->Builder;
4300 BasicBlock *Preheader = CLI->getPreheader();
4301 Value *TripCount = CLI->getTripCount();
4307 Preheader->splice(std::prev(Preheader->end()), CLI->getBody(),
4308 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
4313 Builder.restoreIP({Preheader, Preheader->end()});
4314 Preheader->getTerminator()->eraseFromParent();
4315 Builder.CreateBr(CLI->getExit());
4321 CleanUpInfo.EntryBB = CLI->getHeader();
4322 CleanUpInfo.ExitBB = CLI->getExit();
4334 assert((OutlinedFnCallInstruction->getParent() == Preheader) &&
4337 if (OutlinedFnCallInstruction->arg_size() > 1)
4338 LoopBodyArg = OutlinedFnCallInstruction->getArgOperand(1);
4341 OutlinedFnCallInstruction->eraseFromParent();
4348 ToBeDeletedItem->eraseFromParent();
4349 CLI->invalidate();
4361 OI.OuterAllocaBB = CLI->getPreheader();
4362 Function *OuterFn = CLI->getPreheader()->getParent();
4370 OI.EntryBB = CLI->getBody();
4371 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
4375 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
4379 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0, "");
4381 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
4405 /* AllocationBlock */ CLI->getPreheader(),
4419 SmallVector<User *> Users(CLI->getIndVar()->user_begin(),
4420 CLI->getIndVar()->user_end());
4423 if (ParallelRegionBlockSet.count(Inst->getParent())) {
4424 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
4443 return CLI->getAfterIP();
4462 assert(!ChunkSize && "No chunk size with static-chunked schedule");
4485 "schedule type does not support user-defined chunk sizes");
4506 unsigned Bitwidth = Ty->getIntegerBitWidth();
4522 unsigned Bitwidth = Ty->getIntegerBitWidth();
4537 unsigned Bitwidth = Ty->getIntegerBitWidth();
4550 assert(CLI->isValid() && "Requires a valid canonical loop");
4551 assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
4567 Value *IV = CLI->getIndVar();
4568 Type *IVTy = IV->getType();
4573 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4582 // always iterates from 0 to trip-count with step 1. Note that "init" expects
4584 BasicBlock *PreHeader = CLI->getPreheader();
4585 Builder.SetInsertPoint(PreHeader->getTerminator());
4588 Value *UpperBound = CLI->getTripCount();
4592 BasicBlock *Header = CLI->getHeader();
4593 BasicBlock *Exit = CLI->getExit();
4594 BasicBlock *Cond = CLI->getCond();
4595 BasicBlock *Latch = CLI->getLatch();
4596 InsertPointTy AfterIP = CLI->getAfterIP();
4616 PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
4617 PreHeader->getParent());
4618 // This needs to be 32-bit always, so can't use the IVTy Zero above.
4619 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
4629 // Change PHI-node in loop header to use outer cond rather than preheader,
4631 Instruction *Phi = &Header->front();
4633 PI->setIncomingBlock(0, OuterCond);
4634 PI->setIncomingValue(0, LowerBound);
4636 // Then set the pre-header to jump to the OuterCond
4637 Instruction *Term = PreHeader->getTerminator();
4639 Br->setSuccessor(0, OuterCond);
4644 Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
4648 CI->setOperand(1, UpperBound);
4650 Instruction *Branch = &Cond->back();
4652 assert(BI->getSuccessor(1) == Exit);
4653 BI->setSuccessor(1, OuterCond);
4657 Builder.SetInsertPoint(&Latch->back());
4664 Builder.SetInsertPoint(&Exit->back());
4670 CLI->invalidate();
4687 for (Use &U : BB->uses()) {
4691 if (BBsToErase.count(UseInst->getParent()))
4718 BasicBlock *OrigPreheader = Outermost->getPreheader();
4719 BasicBlock *OrigAfter = Outermost->getAfter();
4720 Function *F = OrigPreheader->getParent();
4726 Loop->collectControlBlocks(OldControlBBs);
4733 Builder.restoreIP(Outermost->getPreheaderIP());
4739 assert(L->isValid() &&
4741 Value *OrigTripCount = L->getTripCount();
4755 OrigPreheader->getNextNode(), OrigAfter, "collapsed");
4761 Builder.restoreIP(Result->getBodyIP());
4763 Value *Leftover = Result->getIndVar();
4766 for (int i = NumLoops - 1; i >= 1; --i) {
4767 Value *OrigTripCount = Loops[i]->getTripCount();
4779 // the control flow, from the leading in-between code, the loop nest body, the
4780 // trailing in-between code, and rejoining the collapsed loop's latch.
4784 BasicBlock *ContinueBlock = Result->getBody();
4800 // the in-between code is and instantiate it only once per thread.
4801 for (size_t i = 0; i < NumLoops - 1; ++i)
4802 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
4805 ContinueWith(Innermost->getBody(), Innermost->getLatch());
4808 for (size_t i = NumLoops - 1; i > 0; --i)
4809 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
4812 ContinueWith(Result->getLatch(), nullptr);
4815 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
4816 redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
4820 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
4826 L->invalidate();
4829 Result->assertOK();
4844 Function *F = OutermostLoop->getBody()->getParent();
4845 BasicBlock *InnerEnter = InnermostLoop->getBody();
4846 BasicBlock *InnerLatch = InnermostLoop->getLatch();
4852 Loop->collectControlBlocks(OldControlBBs);
4860 assert(L->isValid() && "All input loops must be valid canonical loops");
4861 OrigTripCounts.push_back(L->getTripCount());
4862 OrigIndVars.push_back(L->getIndVar());
4872 for (int i = 0; i < NumLoops - 1; ++i) {
4876 BasicBlock *EnterBB = Surrounding->getBody();
4877 BasicBlock *ExitBB = Nested->getHeader();
4883 Builder.restoreIP(OutermostLoop->getPreheaderIP());
4888 Type *IVType = OrigTripCount->getType();
4896 // Unfortunately we cannot just use the roundup-formula
4897 // (tripcount + tilesize - 1)/tilesize
4919 BasicBlock *Enter = OutermostLoop->getPreheader();
4923 BasicBlock *Continue = OutermostLoop->getAfter();
4926 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
4930 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
4933 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
4934 redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
4937 Enter = EmbeddedLoop->getBody();
4938 Continue = EmbeddedLoop->getLatch();
4939 OutroInsertBefore = EmbeddedLoop->getLatch();
4956 Builder.SetInsertPoint(Enter->getTerminator());
4963 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
4998 Builder.restoreIP(Result.back()->getBodyIP());
5006 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
5008 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
5009 OrigIndVar->replaceAllUsesWith(Shift);
5016 L->invalidate();
5020 GenL->assertOK();
5033 LLVMContext &Ctx = BB->getContext();
5038 MDNode *Existing = BB->getTerminator()->getMetadata(LLVMContext::MD_loop);
5040 append_range(NewProperties, drop_begin(Existing->operands(), 1));
5044 BasicBlockID->replaceOperandWith(0, BasicBlockID);
5046 BB->getTerminator()->setMetadata(LLVMContext::MD_loop, BasicBlockID);
5053 assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
5056 BasicBlock *Latch = Loop->getLatch();
5092 Function *F = CanonicalLoop->getFunction();
5099 SplitBefore = CanonicalLoop->getPreheader()->getTerminator();
5114 Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
5117 BasicBlock *Head = SplitBefore->getParent();
5118 Instruction *HeadOldTerm = Head->getTerminator();
5119 llvm::LLVMContext &C = Head->getContext();
5121 C, NamePrefix + ".if.then", Head->getParent(), Head->getNextNode());
5123 C, NamePrefix + ".if.else", Head->getParent(), CanonicalLoop->getExit());
5129 InsertPointTy IP{BrInstr->getParent(), ++BrInstr->getIterator()};
5132 ThenBlock->replaceSuccessorsPhiUsesWith(Head, ThenBlock);
5139 VMap[CanonicalLoop->getPreheader()] = ElseBlock;
5140 for (BasicBlock *Block : L->getBlocks()) {
5142 NewBB->moveBefore(CanonicalLoop->getExit());
5173 Function *F = CanonicalLoop->getFunction();
5187 Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
5190 Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator());
5194 Builder.CreateAlignmentAssumption(F->getDataLayout(),
5204 Value *MappedLatch = VMap.lookup(CanonicalLoop->getLatch());
5224 for (BasicBlock *Block : L->getBlocks()) {
5225 if (Block == CanonicalLoop->getCond() ||
5226 Block == CanonicalLoop->getHeader())
5234 // the memory instructions parallel, because loop-carried
5239 // Add access group metadata to memory-access instructions.
5272 /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
5281 /// might become be worth requiring front-ends to pass on their TargetMachine,
5283 /// have just a single main TargetMachine per translation unit, "target-cpu" and
5284 /// "target-features" that determine the TargetMachine are per-function and can
5288 Module *M = F->getParent();
5290 StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
5291 StringRef Features = F->getFnAttribute("target-features").getValueAsString();
5292 const std::string &Triple = M->getTargetTriple();
5300 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
5305 /// Heuristically determine the best-performant unroll factor for \p CLI. This
5306 /// depends on the target processor. We are re-using the same heuristics as the
5309 Function *F = CLI->getFunction();
5326 [&](const Function &F) { return TM->getTargetTransformInfo(F); });
5340 Loop *L = LI.getLoopFor(CLI->getHeader());
5386 for (BasicBlock *BB : L->blocks()) {
5390 Ptr = Load->getPointerOperand();
5392 Ptr = Store->getPointerOperand();
5396 Ptr = Ptr->stripPointerCasts();
5399 if (Alloca->getParent() == &F->getEntryBlock())
5441 Function *F = Loop->getFunction();
5442 LLVMContext &Ctx = F->getContext();
5444 // If the unrolled loop is not used for another loop-associated directive, it
5475 Type *IndVarTy = Loop->getIndVarType();
5477 // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
5480 ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
5500 (*UnrolledCLI)->assertOK();
5632 [](Value *SV) { return SV->getType()->isIntegerTy(64); }) &&
5642 ArgsBase->setAlignment(Align(8));
5645 // Store the index value with offset in depend vector.
5650 STInst->setAlignment(Align(8));
5712 Instruction *SplitPos = EntryBB->getTerminator();
5715 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
5717 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
5719 Builder.SetInsertPoint(EntryBB->getTerminator());
5727 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
5728 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
5729 FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
5732 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
5738 assert(SplitPos->getParent() == ExitBB &&
5741 BasicBlock *ExitPredBB = SplitPos->getParent();
5744 SplitPos->eraseFromParent();
5763 Function *CurFn = EntryBB->getParent();
5764 CurFn->insert(std::next(EntryBB->getIterator()), ThenBB);
5767 // branch (If-stmt)
5768 Instruction *EntryBBTI = EntryBB->getTerminator();
5770 EntryBBTI->removeFromParent();
5773 UI->eraseFromParent();
5774 Builder.SetInsertPoint(ThenBB->getTerminator());
5777 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
5797 Instruction *FiniBBTI = FiniBB->getTerminator();
5807 ExitCall->removeFromParent();
5810 return IRBuilder<>::InsertPoint(ExitCall->getParent(),
5811 ExitCall->getIterator());
5835 Function *CurFn = OMP_Entry->getParent();
5842 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
5843 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
5845 OMP_Entry->getTerminator()->eraseFromParent();
5908 Device = ConstantInt::get(Int32, -1);
5936 Device = ConstantInt::get(Int32, -1);
5964 Device = ConstantInt::get(Int32, -1);
6016 Function *Kernel = Builder.GetInsertBlock()->getParent();
6018 // Manifest the launch configuration in the metadata matching the kernel
6039 StringRef KernelName = Kernel->getName();
6046 const DataLayout &DL = Fn->getDataLayout();
6056 DynamicEnvironmentGV->setVisibility(GlobalValue::ProtectedVisibility);
6059 DynamicEnvironmentGV->getType() == DynamicEnvironmentPtr
6089 KernelEnvironmentGV->setVisibility(GlobalValue::ProtectedVisibility);
6092 KernelEnvironmentGV->getType() == KernelEnvironmentPtr
6096 Value *KernelLaunchEnvironment = Kernel->getArg(0);
6101 ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
6105 // if (ThreadKind == -1)
6111 BasicBlock *CheckBB = UI->getParent();
6112 BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
6115 CheckBB->getContext(), "worker.exit", CheckBB->getParent());
6119 auto *CheckBBTI = CheckBB->getTerminator();
6121 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6123 CheckBBTI->eraseFromParent();
6124 UI->eraseFromParent();
6128 return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
6145 Function *Kernel = Builder.GetInsertBlock()->getParent();
6147 StringRef KernelName = Kernel->getName();
6154 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
6161 KernelEnvironmentGV->setInitializer(NewInitializer);
6167 for (auto *Op : MD->operands()) {
6168 if (Op->getNumOperands() != 3)
6170 auto *KernelOp = dyn_cast<ConstantAsMetadata>(Op->getOperand(0));
6171 if (!KernelOp || KernelOp->getValue() != &Kernel)
6173 auto *Prop = dyn_cast<MDString>(Op->getOperand(1));
6174 if (!Prop || Prop->getString() != Name)
6186 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
6187 int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6188 ExistingOp->replaceOperandWith(
6190 OldVal->getValue()->getType(),
6201 MD->addOperand(MDNode::get(Ctx, MDVals));
6211 const auto &Attr = Kernel.getFnAttribute("amdgpu-flat-work-group-size");
6225 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
6226 int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6238 Kernel.addFnAttr("amdgpu-flat-work-group-size",
6258 Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");
6266 OutlinedFn->setLinkage(GlobalValue::WeakODRLinkage);
6268 OutlinedFn->setDSOLocal(false);
6269 OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility);
6271 OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL);
6372 !MapInfo->Names.empty());
6425 emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
6557 // or i64. This assumes 64-bit address spaces/pointers.
6559 ParameterTypes.push_back(Arg->getType()->isPointerTy()
6560 ? Arg->getType()
6564 ParameterTypes.push_back(Arg->getType());
6570 Builder.GetInsertBlock()->getModule());
6600 Builder.SetInsertPoint(EntryBB->getFirstNonPHI());
6603 Builder.SetInsertPoint(UserCodeEntryBB->getFirstNonPHIOrDbg());
6608 ? make_range(Func->arg_begin() + 1, Func->arg_end())
6609 : Func->args();
6627 // from MLIR to LLVM-IR and the MLIR lowering may still require the original
6633 for (User *User : make_early_inc_range(Input->users()))
6635 if (Instr->getFunction() == Func)
6636 Instr->replaceUsesOfWith(Input, InputCopy);
6657 // purposes. If we have mapped a segment that requires a GEP into the 0-th
6699 Function *KernelLaunchFunction = StaleCI->getCalledFunction();
6717 OpenMPIRBuilder::InsertPointTy IP(StaleCI->getParent(),
6718 StaleCI->getIterator());
6719 LLVMContext &Ctx = StaleCI->getParent()->getContext();
6728 Builder.GetInsertBlock()->getModule());
6729 ProxyFn->getArg(0)->setName("thread.id");
6730 ProxyFn->getArg(1)->setName("task");
6736 bool HasShareds = StaleCI->arg_size() > 1;
6741 assert((!HasShareds || (StaleCI->arg_size() == 2)) &&
6744 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
6749 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
6753 Value *TaskT = ProxyFn->getArg(1);
6754 Value *ThreadId = ProxyFn->getArg(0);
6763 NewArgStructAlloca, NewArgStructAlloca->getAlign(), LoadShared,
6764 LoadShared->getPointerAlignment(M.getDataLayout()), SharedsSize);
6798 // --------------------------------------------------
6804 // --------------------------------------------------
6808 // --------------------------------------------------
6843 // memcpy(proxy_target_task->shareds, %structArg, sizeof(structArg))
6888 TargetTaskAllocaBB->begin());
6889 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->begin());
6919 bool HasShareds = StaleCI->arg_size() > 1;
6938 // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
6942 // Argument - `sizeof_kmp_task_t` (TaskSize)
6949 // Argument - `sizeof_shareds` (SharedsSize)
6954 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
6959 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
6966 // Argument - `flags`
6983 Value *Shareds = StaleCI->getArgOperand(1);
6984 Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
6992 // ---------------------------------------------------------------
6997 // ---------------------------------------------------------------
7024 CI->setDebugLoc(StaleCI->getDebugLoc());
7027 // HasNoWait - meaning the task may be deferred. Call
7043 StaleCI->eraseFromParent();
7045 [](Instruction *I) { I->eraseFromParent(); });
7052 << *(Builder.GetInsertBlock()->getParent()->getParent())
7078 [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
7153 llvm::raw_svector_ostream OS(Buffer);
7156 OS << Sep << Part;
7159 return OS.str().str();
7173 assert(Elem.second->getValueType() == Ty &&
7180 auto Linkage = this->M.getTargetTriple().rfind("wasm32") == 0
7190 GV->setAlignment(std::max(TypeAlign, PtrAlign));
7206 Constant::getNullValue(PointerType::getUnqual(BasePtr->getContext()));
7208 Builder.CreateGEP(BasePtr->getType(), Null, Builder.getInt32(1));
7219 M, MaptypesArrayInit->getType(),
7222 MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
7268 Constant::getNullValue(PointerType::getUnqual(Int8Ptr->getContext()));
7325 // If there is no user-defined mapper, set the mapper array to nullptr to
7345 // uint64_t offset;
7356 // the size of Components, however, the size of offset, count, and stride is
7357 // equal to the size of base declaration that is non-contiguous.
7360 // non-contiguous.
7369 unsigned RevIdx = EE - II - 1;
7371 DimsAddr->getAllocatedType(), DimsAddr,
7373 // Offset
7377 M.getDataLayout().getPrefTypeAlign(OffsetLVal->getType()));
7382 M.getDataLayout().getPrefTypeAlign(CountLVal->getType()));
7387 M.getDataLayout().getPrefTypeAlign(CountLVal->getType()));
7464 new GlobalVariable(M, SizesArrayInit->getType(), /*isConstant=*/true,
7466 SizesArrayGbl->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7476 Buffer->setAlignment(OffloadSizeAlign);
7479 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->getType()),
7483 Buffer->getAllocationSize(M.getDataLayout())->getFixedValue()));
7579 MappersArray->getAllocatedType(), MappersArray,
7582 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->getType()));
7594 if (!CurBB || CurBB->getTerminator()) {
7598 // Otherwise, create a fall-through branch.
7612 if (IsFinished && BB->use_empty()) {
7613 BB->eraseFromParent();
7619 if (CurBB && CurBB->getParent())
7620 CurFn->insert(std::next(CurBB->getIterator()), BB);
7622 CurFn->insert(CurFn->end(), BB);
7632 auto CondConstant = CI->getSExtValue();
7640 Function *CurFn = Builder.GetInsertBlock()->getParent();
7704 // do nothing - leave silently.
7730 assert(X.Var->getType()->isPointerTy() &&
7733 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
7734 XElemTy->isPointerTy()) &&
7739 if (XElemTy->isIntegerTy()) {
7742 XLD->setAtomic(AO);
7747 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
7750 XLoad->setAtomic(AO);
7751 if (XElemTy->isFloatingPointTy()) {
7769 assert(X.Var->getType()->isPointerTy() &&
7772 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
7773 XElemTy->isPointerTy()) &&
7776 if (XElemTy->isIntegerTy()) {
7778 XSt->setAtomic(AO);
7782 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
7786 XSt->setAtomic(AO);
7802 Type *XTy = X.Var->getType();
7803 assert(XTy->isPointerTy() &&
7806 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
7807 XElemTy->isPointerTy()) &&
7857 // TODO: handle the case where XElemTy is not byte-sized or not a power of 2
7875 emitRMWOp &= XElemTy->isIntegerTy();
7889 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
7891 Builder.CreateLoad(IntCastTy, X, X->getName() + ".atomic.load");
7892 OldVal->setAtomic(AO);
7894 // | /---\
7896 // | \---/
7899 Instruction *CurBBTI = CurBB->getTerminator();
7902 CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
7903 BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
7904 X->getName() + ".atomic.cont");
7905 ContBB->getTerminator()->eraseFromParent();
7908 NewAtomicAddr->setName(X->getName() + "x.new.val");
7910 llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
7911 PHI->addIncoming(OldVal, CurBB);
7912 bool IsIntTy = XElemTy->isIntegerTy();
7915 if (XElemTy->isFloatingPointTy()) {
7917 X->getName() + ".atomic.fltCast");
7920 X->getName() + ".atomic.ptrCast");
7931 Result->setVolatile(VolatileX);
7934 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
7942 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
7943 CurBBTI->eraseFromParent();
7962 Type *XTy = X.Var->getType();
7963 assert(XTy->isPointerTy() &&
7966 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
7967 XElemTy->isPointerTy()) &&
8007 assert(X.Var->getType()->isPointerTy() &&
8011 assert(V.Var->getType()->isPointerTy() && "v.var must be of pointer type");
8015 bool IsInteger = E->getType()->isIntegerTy();
8021 IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
8035 assert(OldValue->getType() == V.ElemTy &&
8042 // CurBB----
8048 // ExitBB <-
8052 Instruction *CurBBTI = CurBB->getTerminator();
8054 BasicBlock *ExitBB = CurBB->splitBasicBlock(
8055 CurBBTI, X.Var->getName() + ".atomic.exit");
8056 BasicBlock *ContBB = CurBB->splitBasicBlock(
8057 CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
8058 ContBB->getTerminator()->eraseFromParent();
8059 CurBB->getTerminator()->eraseFromParent();
8068 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
8069 CurBBTI->eraseFromParent();
8083 assert(R.Var->getType()->isPointerTy() &&
8085 assert(R.ElemTy->isIntegerTy() && "r must be of integral type");
8186 Function *CurrentFunction = Builder.GetInsertBlock()->getParent();
8189 BasicBlock &OuterAllocaBB = CurrentFunction->getEntryBlock();
8192 Builder.SetInsertPoint(BodyBB, BodyBB->begin());
8222 "if lowerbound is non-null, then upperbound must also be non-null "
8232 assert(IfExpr->getType()->isIntegerTy() &&
8236 if (IfExpr->getType() != Int1)
8238 ConstantInt::get(IfExpr->getType(), 0));
8256 InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin());
8257 InsertPointTy CodeGenIP(BodyBB, BodyBB->begin());
8288 OutlinedFn.getArg(0)->setName("global.tid.ptr");
8289 OutlinedFn.getArg(1)->setName("bound.tid.ptr");
8291 OutlinedFn.getArg(2)->setName("data");
8294 assert(StaleCI && "Error while outlining - no CallInst user found for the "
8298 Ident, Builder.getInt32(StaleCI->arg_size() - 2), &OutlinedFn};
8300 Args.push_back(StaleCI->getArgOperand(2));
8306 [](Instruction *I) { I->eraseFromParent(); });
8315 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
8328 M, MapNamesArrayInit->getType(),
8378 M, ID, Name.empty() ? Addr->getName() : Name, Size, Flags, /*Data=*/0,
8388 Module &M = *(Fn->getParent());
8398 MD->addOperand(MDNode::get(Ctx, MDVals));
8401 Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
8403 Fn->addFnAttr("uniform-work-group-size", "true");
8404 Fn->addFnAttr(Attribute::MustProgress);
8436 // - Entry 0 -> Kind of this type of metadata (0).
8437 // - Entry 1 -> Device ID of the file where the entry was identified.
8438 // - Entry 2 -> File ID of the file where the entry was identified.
8439 // - Entry 3 -> Mangled name of the function where the entry was
8441 // - Entry 4 -> Line in the file where the entry was identified.
8442 // - Entry 5 -> Count of regions at this DeviceID/FilesID/Line.
8443 // - Entry 6 -> Order the entry was created.
8455 MD->addOperand(MDNode::get(C, Ops));
8467 // - Entry 0 -> Kind of this type of metadata (1).
8468 // - Entry 1 -> Mangled name of the variable.
8469 // - Entry 2 -> Declare target kind.
8470 // - Entry 3 -> Order the entry was created.
8480 MD->addOperand(MDNode::get(C, Ops));
8491 if (!CE->getID() || !CE->getAddress()) {
8500 createOffloadEntry(CE->getID(), CE->getAddress(),
8501 /*Size=*/0, CE->getFlags(),
8508 CE->getFlags());
8514 if (!CE->getAddress()) {
8518 // The vaiable has no definition - no need to add the entry.
8519 if (CE->getVarSize() == 0)
8523 assert(((Config.isTargetDevice() && !CE->getAddress()) ||
8524 (!Config.isTargetDevice() && CE->getAddress())) &&
8528 if (!CE->getAddress()) {
8540 if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress()))
8541 if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) &&
8548 createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
8549 Flags, CE->getLinkage(), CE->getVarName());
8551 createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
8552 Flags, CE->getLinkage());
8561 // TODO: This reduces the offloading entries to a 32-bit integer. Offloading
8562 // entries should be redesigned to better suit this use-case.
8574 raw_svector_ostream OS(Name);
8575 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
8578 OS << "_" << Count;
8606 unsigned Offset = 0;
8611 Offset++;
8612 return Offset;
8662 raw_svector_ostream OS(PtrName);
8663 OS << MangledName;
8665 OS << format("_%x", EntryInfo.FileID);
8666 OS << "_decl_tgt_ref_ptr";
8676 GV->setLinkage(GlobalValue::WeakAnyLinkage);
8680 GV->setInitializer(GlobalInitializer());
8682 GV->setInitializer(GlobalValue);
8726 M.getDataLayout().getTypeSizeInBits(LlvmVal->getValueType()), 8);
8729 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->getLinkage();
8735 // Do not create a "ref-variable" if the original is not also available
8744 getOrCreateInternalVariable(Addr->getType(), RefName);
8746 GvAddrRef->setConstant(true);
8747 GvAddrRef->setLinkage(GlobalValue::InternalLinkage);
8748 GvAddrRef->setInitializer(Addr);
8759 VarName = (Addr) ? Addr->getName() : "";
8766 VarName = (Addr) ? Addr->getName() : "";
8786 for (MDNode *MN : MD->operands()) {
8788 auto *V = cast<ConstantAsMetadata>(MN->getOperand(Idx));
8789 return cast<ConstantInt>(V->getValue())->getZExtValue();
8793 auto *V = cast<MDString>(MN->getOperand(Idx));
8794 return V->getString();
8838 Ctx, parseBitcodeFile(Buf.get()->getMemBufferRef(), Ctx));
8848 //===----------------------------------------------------------------------===//
8850 //===----------------------------------------------------------------------===//
8863 return It->second;
8891 if (OMPBuilder->Config.isTargetDevice()) {
8924 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
8946 if (OMPBuilder->Config.isTargetDevice()) {
8990 //===----------------------------------------------------------------------===//
8992 //===----------------------------------------------------------------------===//
9016 Instruction *CmpI = &getCond()->front();
9018 CmpI->setOperand(1, TripCount);
9035 for (Use &U : OldIV->uses()) {
9039 if (User->getParent() == getCond())
9041 if (User->getParent() == getLatch())
9051 U->set(NewIV);
9068 // Verify standard control-flow we use for OpenMP loops.
9070 assert(isa<BranchInst>(Preheader->getTerminator()) &&
9072 assert(Preheader->getSingleSuccessor() == Header &&
9076 assert(isa<BranchInst>(Header->getTerminator()) &&
9078 assert(Header->getSingleSuccessor() == Cond &&
9082 assert(Cond->getSinglePredecessor() == Header &&
9085 assert(isa<BranchInst>(Cond->getTerminator()) &&
9089 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
9091 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
9095 assert(Body->getSinglePredecessor() == Cond &&
9097 assert(!isa<PHINode>(Body->front()));
9100 assert(isa<BranchInst>(Latch->getTerminator()) &&
9102 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
9105 assert(Latch->getSinglePredecessor() != nullptr);
9106 assert(!isa<PHINode>(Latch->front()));
9109 assert(isa<BranchInst>(Exit->getTerminator()) &&
9111 assert(Exit->getSingleSuccessor() == After &&
9115 assert(After->getSinglePredecessor() == Exit &&
9117 assert(After->empty() || !isa<PHINode>(After->front()));
9121 assert(isa<IntegerType>(IndVar->getType()) &&
9123 assert(cast<PHINode>(IndVar)->getParent() == Header &&
9125 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
9127 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
9128 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
9130 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
9131 assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
9132 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
9133 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
9134 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
9135 ->isOne());
9139 assert(IndVar->getType() == TripCount->getType() &&
9142 auto *CmpI = cast<CmpInst>(&Cond->front());
9143 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
9144 "Exit condition must be a signed less-than comparison");
9145 assert(CmpI->getOperand(0) == IndVar &&
9147 assert(CmpI->getOperand(1) == TripCount &&