Lines Matching +full:actions +full:- +full:builder

1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
142 /// this is a not-yet-implemented feature.
213 result = todo("parallelization-level");
350 /// normal operations in the builder.
352 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
369 // confusion. Create a new BasicBlock for the Builder and use the entry block
373 if (builder.GetInsertBlock() ==
374 &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
375 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
378 builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
379 builder.GetInsertBlock()->getNextNode());
380 builder.CreateBr(entryBB);
381 builder.SetInsertPoint(entryBB);
385 builder.GetInsertBlock()->getParent()->getEntryBlock();
392 /// region, and a branch from any block with an successor-less OpenMP terminator
396 Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
400 splitBB(builder, true, "omp.region.cont");
401 llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
403 llvm::LLVMContext &llvmContext = builder.getContext();
406 llvmContext, blockName, builder.GetInsertBlock()->getParent(),
407 builder.GetInsertBlock()->getNextNode());
411 llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
422 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
424 moduleTranslation.convertType(yield->getOperand(i).getType()));
428 assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
430 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
432 moduleTranslation.convertType(yield->getOperand(i).getType());
449 llvm::IRBuilderBase::InsertPointGuard guard(builder);
450 continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
451 builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
453 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
462 // converted region (regions are single-entry).
463 if (bb->isEntryBlock()) {
464 assert(sourceTerminator->getNumSuccessors() == 1 &&
466 assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
468 sourceTerminator->setSuccessor(0, llvmBB);
471 llvm::IRBuilderBase::InsertPointGuard guard(builder);
473 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
479 // here to avoid relying inter-function communication through the
482 // in the same code that handles the region-owning operation.
483 Operation *terminator = bb->getTerminator();
485 builder.CreateBr(continuationBlock);
487 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
488 (*continuationBlockPHIs)[i]->addIncoming(
489 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
505 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
549 return op->emitError() << "cannot ignore nested wrapper";
579 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
590 builder.restoreIP(codeGenIP);
591 return convertOmpOpRegions(region, "omp.masked.region", builder,
596 // TODO: Perform finalization actions for variables. This has to be
604 llvm::LLVMContext &llvmContext = builder.getContext();
609 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
611 moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
617 builder.restoreIP(*afterIP);
623 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
634 builder.restoreIP(codeGenIP);
635 return convertOmpOpRegions(region, "omp.master.region", builder,
640 // TODO: Perform finalization actions for variables. This has to be
644 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
646 moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
652 builder.restoreIP(*afterIP);
658 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
669 builder.restoreIP(codeGenIP);
670 return convertOmpOpRegions(region, "omp.critical.region", builder,
675 // TODO: Perform finalization actions for variables. This has to be
679 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
686 // non-null
696 moduleTranslation.getOpenMPBuilder()->createCritical(
702 builder.restoreIP(*afterIP);
715 privatizations.reserve(privatizations.size() + attr->size());
716 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
731 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
739 /// the current insertion point of `builder`. The operations of the entry block
744 Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
750 // Special case for single-block regions that don't create additional blocks:
754 builder.GetInsertBlock()->empty() ? nullptr
755 : &builder.GetInsertBlock()->back();
757 if (potentialTerminator && potentialTerminator->isTerminator())
758 potentialTerminator->removeFromParent();
759 moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
762 region.front(), /*ignoreArguments=*/true, builder)))
775 if (potentialTerminator && potentialTerminator->isTerminator()) {
776 llvm::BasicBlock *block = builder.GetInsertBlock();
777 if (block->empty()) {
783 potentialTerminator->insertInto(block, block->begin());
785 potentialTerminator->insertAfter(&block->back());
794 convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
801 builder.SetInsertPoint(*continuationBlock,
802 (*continuationBlock)->getFirstInsertionPt());
819 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
820 /// reduction declaration. The generator uses `builder` but ignores its
823 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
825 // The lambda is mutable because we need access to non-const methods of decl
826 // (which aren't actually mutating it), and we must capture decl by-value to
832 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
835 builder.restoreIP(insertPoint);
838 "omp.reduction.nonatomic.body", builder,
844 return builder.saveIP();
849 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
850 /// given reduction declaration. The generator uses `builder` but ignores its
855 llvm::IRBuilderBase &builder,
860 // The lambda is mutable because we need access to non-const methods of decl
861 // (which aren't actually mutating it), and we must capture decl by-value to
866 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
869 builder.restoreIP(insertPoint);
872 "omp.reduction.atomic.body", builder,
877 return builder.saveIP();
884 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
906 findAllocaInsertPoint(builder, moduleTranslation);
907 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
908 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
917 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
928 builder.restoreIP(codeGenIP);
929 return convertOmpOpRegions(region, "omp.ordered.region", builder,
934 // TODO: Perform finalization actions for variables. This has to be
938 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
940 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
946 builder.restoreIP(*afterIP);
967 llvm::IRBuilderBase &builder,
975 llvm::IRBuilderBase::InsertPointGuard guard(builder);
976 builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
989 builder, moduleTranslation, &phis)))
994 builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
998 llvm::Value *var = builder.CreateAlloca(
1007 "allocaction is implicit for by-val reduction");
1008 llvm::Value *var = builder.CreateAlloca(
1044 /// `builder`'s insertion point is where the user wants the `init` regions to be
1046 /// `init` regions. It also leaves the `builder's insertions point in a state
1047 /// where the user can continue the code-gen directly afterwards.
1051 llvm::IRBuilderBase &builder,
1062 llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1064 latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1065 builder.restoreIP(allocaIP);
1073 // TODO: remove after all users of by-ref are updated to use the alloc
1076 byRefVars[i] = builder.CreateAlloca(
1081 if (initBlock->empty() || initBlock->getTerminator() == nullptr)
1082 builder.SetInsertPoint(initBlock);
1084 builder.SetInsertPoint(initBlock->getTerminator());
1089 builder.CreateStore(data, addr);
1102 "omp.reduction.neutral", builder,
1109 if (builder.GetInsertBlock()->empty() ||
1110 builder.GetInsertBlock()->getTerminator() == nullptr)
1111 builder.SetInsertPoint(builder.GetInsertBlock());
1113 builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1120 // TODO: this path can be removed once all users of by-ref are updated to
1125 builder.CreateStore(phis[0], byRefVars[i]);
1131 // for by-ref case the store is inside of the reduction region
1132 builder.CreateStore(phis[0], privateReductionVariables[i]);
1137 // different mapping if this reduction declaration is re-used for a
1148 T loop, llvm::IRBuilderBase &builder,
1159 makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1161 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1186 llvm::IRBuilderBase &builder, StringRef regionName,
1189 if (cleanupRegion->empty())
1193 Block &entry = cleanupRegion->front();
1196 builder.GetInsertBlock()->empty() ? nullptr
1197 : &builder.GetInsertBlock()->back();
1198 if (potentialTerminator && potentialTerminator->isTerminator())
1199 builder.SetInsertPoint(potentialTerminator);
1202 ? builder.CreateLoad(
1209 if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1213 // clear block argument mapping in case it needs to be re-created with a
1223 OP op, llvm::IRBuilderBase &builder,
1239 collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1246 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1247 builder.SetInsertPoint(tempTerminator);
1249 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1255 if (!contInsertPoint->getBlock())
1256 return op->emitOpError() << "failed to convert reductions";
1259 ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1264 tempTerminator->eraseFromParent();
1265 builder.restoreIP(*afterIP);
1274 moduleTranslation, builder,
1288 OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1300 if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1306 return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1320 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1323 if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1341 return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1351 allocatePrivateVars(llvm::IRBuilderBase &builder,
1361 llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
1363 allocaTerminator->getIterator()),
1366 llvm::IRBuilderBase::InsertPointGuard guard(builder);
1369 llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
1370 builder.SetInsertPoint(allocaTerminator);
1371 assert(allocaTerminator->getNumSuccessors() == 1 &&
1374 llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1377 // They read from their block argument (amongst other non-alloca things).
1379 // function it places the loads for live in-values (such as these block
1383 // of the live-in values they are using. Fix this by adding a latealloc
1385 // mixing non-alloca code with allocas).
1390 privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
1397 mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1401 // in-place convert the private allocation region
1405 // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1408 builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1410 builder.SetInsertPoint(privAllocBlock->getTerminator());
1414 builder, moduleTranslation, &phis)))
1424 // re-created with a different source for another use of the same
1432 initFirstPrivateVars(llvm::IRBuilderBase &builder,
1438 llvm::IRBuilderBase::InsertPointGuard guard(builder);
1449 assert(afterAllocas->getSinglePredecessor());
1452 builder.SetInsertPoint(afterAllocas->getSinglePredecessor()->getTerminator());
1454 splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1455 builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1473 // in-place convert copy region
1474 builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1475 if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1482 // re-created with different sources for reuse of the same reduction
1491 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1503 privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1512 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1529 findAllocaInsertPoint(builder, moduleTranslation);
1539 sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1558 auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1560 builder.restoreIP(codeGenIP);
1575 return convertOmpOpRegions(region, "omp.section.region", builder,
1582 // No sections within omp.sections operation - skip generation. This situation
1590 // TODO: Perform appropriate actions according to the data-sharing
1595 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1600 // TODO: Perform finalization actions for variables. This has to be
1604 allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1605 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1607 moduleTranslation.getOpenMPBuilder()->createSections(
1614 builder.restoreIP(*afterIP);
1617 return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
1624 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1627 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1633 builder.restoreIP(codegenIP);
1635 builder, moduleTranslation)
1654 moduleTranslation.getOpenMPBuilder()->createSingle(
1661 builder.restoreIP(*afterIP);
1667 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1676 builder.restoreIP(codegenIP);
1677 return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1698 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1700 moduleTranslation.getOpenMPBuilder()->createTeams(
1706 builder.restoreIP(*afterIP);
1716 for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
1738 llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1745 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1764 InsertPointTy codegenIP) -> llvm::Error {
1771 builder, moduleTranslation, privateBlockArgs, privateDecls,
1776 if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1782 builder.restoreIP(codegenIP);
1784 taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
1788 builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
1790 if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
1802 findAllocaInsertPoint(builder, moduleTranslation);
1803 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1805 moduleTranslation.getOpenMPBuilder()->createTask(
1816 builder.restoreIP(*afterIP);
1822 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
1829 builder.restoreIP(codegenIP);
1831 builder, moduleTranslation)
1835 InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1836 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1838 moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
1844 builder.restoreIP(*afterIP);
1849 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
1854 moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
1860 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
1876 llvm::Type *ivType = step->getType();
1881 chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
1899 findAllocaInsertPoint(builder, moduleTranslation);
1905 builder, moduleTranslation, privateBlockArgs, privateDecls,
1917 if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
1923 if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1928 assert(afterAllocas.get()->getSinglePredecessor());
1929 if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
1931 afterAllocas.get()->getSinglePredecessor(),
1950 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1956 llvm::Value *iv) -> llvm::Error {
1966 if (loopInfos.size() != loopOp.getNumLoops() - 1)
1970 builder.restoreIP(ip);
1971 return convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
1996 computeIP = loopInfos.front()->getPreheaderIP();
2000 ompBuilder->createCanonicalLoop(
2012 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
2014 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
2016 allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2024 ompBuilder->applyWorkshareLoop(
2037 builder.restoreIP(afterIP);
2040 if (failed(createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
2045 return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2051 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2081 InsertPointTy codeGenIP) -> llvm::Error {
2083 builder, moduleTranslation, privateBlockArgs, privateDecls,
2096 allocaIP.getBlock()->getTerminator()->getIterator());
2099 opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2104 if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
2109 assert(afterAllocas.get()->getSinglePredecessor());
2110 builder.restoreIP(codeGenIP);
2113 initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2114 afterAllocas.get()->getSinglePredecessor(),
2132 opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2142 collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2147 builder.SetInsertPoint((*regionBlock)->getTerminator());
2150 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2151 builder.SetInsertPoint(tempTerminator);
2154 ompBuilder->createReductions(builder.saveIP(), allocaIP,
2159 if (!contInsertPoint->getBlock())
2162 tempTerminator->eraseFromParent();
2163 builder.restoreIP(*contInsertPoint);
2176 // TODO: Perform finalization actions for variables. This has to be
2178 auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2179 InsertPointTy oldIP = builder.saveIP();
2180 builder.restoreIP(codeGenIP);
2191 moduleTranslation, builder, "omp.reduction.cleanup")))
2195 if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2199 builder.restoreIP(oldIP);
2216 findAllocaInsertPoint(builder, moduleTranslation);
2217 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2220 ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2226 builder.restoreIP(*afterIP);
2244 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2265 findAllocaInsertPoint(builder, moduleTranslation);
2266 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2269 builder, moduleTranslation, privateBlockArgs, privateDecls,
2278 llvm::Value *iv) -> llvm::Error {
2288 if (loopInfos.size() != loopOp.getNumLoops() - 1)
2292 builder.restoreIP(ip);
2293 return convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
2319 computeIP = loopInfos.front()->getPreheaderIP();
2323 ompBuilder->createCanonicalLoop(
2334 llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
2336 ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
2340 simdlen = builder.getInt64(simdlenVar.value());
2344 safelen = builder.getInt64(safelenVar.value());
2348 llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2354 llvm::Type *ty = llvmVal->getType();
2356 alignment = builder.getInt64(intAttr.getInt());
2357 assert(ty->isPointerTy() && "Invalid type for aligned variable");
2359 auto curInsert = builder.saveIP();
2360 builder.SetInsertPoint(sourceBlock->getTerminator());
2361 llvmVal = builder.CreateLoad(ty, llvmVal);
2362 builder.restoreIP(curInsert);
2366 ompBuilder->applySimd(loopInfo, alignedVars,
2372 builder.restoreIP(afterIP);
2374 return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2401 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
2409 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2420 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
2426 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
2434 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2441 builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
2464 llvm::IRBuilderBase &builder,
2511 llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2514 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2515 if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2526 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2527 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2529 ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
2536 builder.restoreIP(*afterIP);
2542 llvm::IRBuilderBase &builder,
2603 llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2609 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2610 if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2621 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2622 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2624 ompBuilder->createAtomicCapture(
2631 builder.restoreIP(*afterIP);
2638 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
2640 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2661 if (!ompBuilder->Config.isTargetDevice()) {
2662 llvm::Type *type = globalValue->getValueType();
2664 builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
2666 llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
2667 llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
2713 auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
2730 auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
2754 addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
2767 ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
2772 return moduleTranslation.getLLVMModule()->getNamedValue(
2775 return moduleTranslation.getLLVMModule()->getNamedValue(
2826 // e.g. given a 1-D array of ints, we will calculate the size from the integer
2835 llvm::Type *baseType, llvm::IRBuilderBase &builder,
2844 llvm::Value *elementCount = builder.getInt64(1);
2849 // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
2852 elementCount = builder.CreateMul(
2854 builder.CreateAdd(
2855 builder.CreateSub(
2858 builder.getInt64(1)));
2871 // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2873 return builder.CreateMul(elementCount,
2874 builder.getInt64(underlyingTypeSzInBits / 8));
2878 return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
2884 llvm::IRBuilderBase &builder, const ArrayRef<Value> &useDevPtrOperands = {},
2925 mapData.BaseType.back(), builder, moduleTranslation));
2969 mapData.Sizes.push_back(builder.getInt64(0));
3042 /// NOTE: which while specified in row-major order it currently needs to be
3044 /// opposed to C++'s row-major, hence the backwards processing where order is
3052 llvm::IRBuilderBase &builder, bool isArrayTy,
3063 // I believe leans more towards Fortran's column-major in memory.
3065 idx.push_back(builder.getInt64(0));
3066 for (int i = bounds.size() - 1; i >= 0; --i) {
3086 // - First row/column we move by 1 for each index increment
3087 // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
3089 // - Third row/column we would move by 10 (second row/column) *
3091 std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
3095 dimensionIndexSizeOffset.push_back(builder.CreateMul(
3097 dimensionIndexSizeOffset[i - 1]));
3105 for (int i = bounds.size() - 1; i >= 0; --i) {
3109 idx.emplace_back(builder.CreateMul(
3113 idx.back() = builder.CreateAdd(
3114 idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
3140 LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3152 mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3156 // addresses at runtime, highAddr - lowAddr = size. This of course
3167 lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3168 builder.getPtrTy());
3169 highAddr = builder.CreatePointerCast(
3170 builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3172 builder.getPtrTy());
3178 lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3179 builder.getPtrTy());
3182 highAddr = builder.CreatePointerCast(
3183 builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3184 mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3185 builder.getPtrTy());
3189 llvm::Value *size = builder.CreateIntCast(
3190 builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3191 builder.getInt64Ty(),
3196 ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3215 mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3224 // pointer -> pointee that requires special handling in certain cases,
3236 // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
3246 LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3282 combinedInfo.Sizes.emplace_back(builder.getInt64(
3283 moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
3309 size = builder.CreateSelect(
3310 builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
3311 builder.getInt64(0), size);
3321 bool isTargetParams, int mapDataParentIdx = -1) {
3341 // part of a larger object (in a parent <-> member mapping) and in this
3357 LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3387 mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
3389 processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
3401 llvm::IRBuilderBase &builder) {
3416 // kernel argument passing from host -> device.
3421 moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
3424 newV = builder.CreateLoad(builder.getPtrTy(), newV);
3427 newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
3434 if (mapData.Pointers[i]->getType()->isPointerTy())
3435 newV = builder.CreateLoad(type, mapData.Pointers[i]);
3440 auto curInsert = builder.saveIP();
3441 builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
3443 builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
3444 builder.restoreIP(curInsert);
3446 builder.CreateStore(newV, memTempAlloc);
3447 newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
3455 mapData.MapClause[i]->emitOpError("Unhandled capture kind");
3463 static void genMapInfos(llvm::IRBuilderBase &builder,
3479 if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3480 createAlteredByCaptureMap(mapData, moduleTranslation, builder);
3497 processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
3507 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
3515 DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
3541 .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
3561 .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
3581 .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
3614 builder, useDevicePtrVars, useDeviceAddrVars);
3619 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
3620 builder.restoreIP(codeGenIP);
3621 genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
3661 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
3670 builder.restoreIP(codeGenIP);
3675 [&](llvm::Value *basePointer) -> llvm::Value * {
3678 return builder.CreateLoad(
3679 builder.getPtrTy(),
3688 if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
3698 builder.restoreIP(codeGenIP);
3701 if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
3710 if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
3716 return builder.saveIP();
3719 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3721 findAllocaInsertPoint(builder, moduleTranslation);
3724 return ompBuilder->createTargetData(
3725 ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID),
3727 return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
3728 builder.getInt64(deviceID), ifCond,
3735 builder.restoreIP(*afterIP);
3749 ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
3755 ompBuilder->createGlobalFlag(
3758 ompBuilder->createGlobalFlag(
3763 ompBuilder->createGlobalFlag(
3768 ompBuilder->createGlobalFlag(
3771 ompBuilder->createGlobalFlag(
3782 auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
3802 llvm::IRBuilderBase &builder, llvm::Function *func) {
3831 for (llvm::User *user : mapData.OriginalValue[i]->users())
3836 if (insn->getFunction() == func) {
3837 auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
3839 load->moveBefore(insn->getIterator());
3840 user->replaceUsesOfWith(mapData.OriginalValue[i], load);
3853 // semantics like read-only/no host write back kernel
3861 // required for future work, but a direct 1-to-1 copy doesn't seem
3865 // \param mapData - A container containing vectors of information
3869 // \param arg - This is the generated kernel function argument that
3873 // \param input - This is the host side value that will be passed to
3880 // \param retVal - This is the value that all uses of input inside of the
3881 // kernel will be re-written to, the goal of this function is to generate
3890 llvm::IRBuilderBase &builder,
3895 builder.restoreIP(allocaIP);
3914 llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
3916 if (allocaAS != defaultAS && arg.getType()->isPointerTy())
3917 v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
3919 builder.CreateStore(&arg, v);
3921 builder.restoreIP(codeGenIP);
3929 retVal = builder.CreateAlignedLoad(
3930 v->getType(), v,
3931 ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
3942 return builder.saveIP();
3945 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
3993 /// Otherwise, if its immediate parent operation (or some other higher-level
4008 return dyn_cast_if_present<OpTy>(op->getParentOp());
4010 return op->getParentOfType<OpTy>();
4060 int32_t minTeamsVal = 1, maxTeamsVal = -1;
4076 minTeamsVal = maxTeamsVal = -1;
4095 int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
4100 int32_t maxThreadsVal = -1;
4133 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
4163 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
4170 bool isTargetDevice = ompBuilder->Config.isTargetDevice();
4180 // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
4191 // TODO: It can also be false if a compile-time constant `false` IF clause is
4194 isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
4248 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4249 // Forward target-cpu and target-features function attributes from the
4253 llvmOutlinedFn = codeGenIP.getBlock()->getParent();
4257 if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
4259 llvmOutlinedFn->addFnAttr(attr);
4261 if (auto attr = llvmParentFn->getFnAttribute("target-features");
4263 llvmOutlinedFn->addFnAttr(attr);
4286 builder, moduleTranslation, privateBlockArgs, privateDecls,
4298 builder.restoreIP(codeGenIP);
4300 targetRegion, "omp.target", builder, moduleTranslation);
4305 builder.SetInsertPoint(*exitBlock);
4309 builder, "omp.targetop.private.cleanup",
4317 return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
4329 builder);
4333 -> llvm::OpenMPIRBuilder::MapInfosTy & {
4334 builder.restoreIP(codeGenIP);
4335 genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
4342 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4353 return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
4362 // Collect host-evaluated values needed to properly launch the kernel from the
4365 initTargetRuntimeAttrs(builder, moduleTranslation, targetOp, runtimeAttrs);
4367 // Pass host-evaluated values as parameters to the kernel / host fallback,
4397 findAllocaInsertPoint(builder, moduleTranslation);
4398 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4405 moduleTranslation.getOpenMPBuilder()->createTarget(
4406 ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
4413 builder.restoreIP(*afterIP);
4417 if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
4418 handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
4436 op->getParentOfType<ModuleOp>().getOperation())) {
4446 llvmFunc->dropAllReferences();
4447 llvmFunc->eraseFromParent();
4455 if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
4460 auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
4472 op->getParentOfType<mlir::ModuleOp>()->getAttr(
4490 ompBuilder->registerTargetGlobalVariable(
4492 ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
4495 gVal->getType(), gVal);
4497 if (ompBuilder->Config.isTargetDevice() &&
4500 ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
4501 ompBuilder->getAddrOfDeclareTargetVar(
4503 ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
4504 generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
4518 if (op->getParentOfType<omp::TargetOp>())
4524 // some manner or result in an ICE (whether they end up in a no-op
4529 if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
4544 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
4550 .Case([&](omp::BarrierOp op) -> LogicalResult {
4555 ompBuilder->createBarrier(builder.saveIP(),
4563 ompBuilder->createTaskyield(builder.saveIP());
4578 ompBuilder->createFlush(builder.saveIP());
4582 return convertOmpParallel(op, builder, moduleTranslation);
4585 return convertOmpMasked(*op, builder, moduleTranslation);
4588 return convertOmpMaster(*op, builder, moduleTranslation);
4591 return convertOmpCritical(*op, builder, moduleTranslation);
4594 return convertOmpOrderedRegion(*op, builder, moduleTranslation);
4597 return convertOmpOrdered(*op, builder, moduleTranslation);
4600 return convertOmpWsloop(*op, builder, moduleTranslation);
4603 return convertOmpSimd(*op, builder, moduleTranslation);
4606 return convertOmpAtomicRead(*op, builder, moduleTranslation);
4609 return convertOmpAtomicWrite(*op, builder, moduleTranslation);
4612 return convertOmpAtomicUpdate(op, builder, moduleTranslation);
4615 return convertOmpAtomicCapture(op, builder, moduleTranslation);
4618 return convertOmpSections(*op, builder, moduleTranslation);
4621 return convertOmpSingle(op, builder, moduleTranslation);
4624 return convertOmpTeams(op, builder, moduleTranslation);
4627 return convertOmpTaskOp(op, builder, moduleTranslation);
4630 return convertOmpTaskgroupOp(op, builder, moduleTranslation);
4633 return convertOmpTaskwaitOp(op, builder, moduleTranslation);
4648 return convertOmpThreadprivate(*op, builder, moduleTranslation);
4652 return convertOmpTargetData(op, builder, moduleTranslation);
4655 return convertOmpTarget(*op, builder, moduleTranslation);
4659 // No-op, should be handled by relevant owning operations e.g.
4665 return inst->emitError() << "not yet implemented: " << inst->getName();
4670 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
4672 return convertHostOrTargetOperation(op, builder, moduleTranslation);
4676 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
4679 return convertOmpTarget(*op, builder, moduleTranslation);
4681 return convertOmpTargetData(op, builder, moduleTranslation);
4683 op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
4685 if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
4690 if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
4708 /// Translates the given operation to LLVM IR using the provided IR builder
4711 convertOperation(Operation *op, llvm::IRBuilderBase &builder,
4714 /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
4734 moduleTranslation.getOpenMPBuilder()->Config;
4744 moduleTranslation.getOpenMPBuilder()->Config;
4755 ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
4771 ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
4791 moduleTranslation.getOpenMPBuilder()->Config;
4808 moduleTranslation.getOpenMPBuilder()->Config;
4832 Operation *op, llvm::IRBuilderBase &builder,
4836 if (ompBuilder->Config.isTargetDevice()) {
4838 return convertTargetDeviceOp(op, builder, moduleTranslation);
4840 return convertTargetOpsInNest(op, builder, moduleTranslation);
4843 return convertHostOrTargetOperation(op, builder, moduleTranslation);
4849 dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();