Lines Matching defs:forOp

44 getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
49 getTripCountMapAndOperands(forOp, &tripCountMap, &tripCountOperands);
56 OpBuilder b(forOp);
57 auto lbMap = forOp.getLowerBoundMap();
58 auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap,
59 forOp.getLowerBoundOperands());
68 int64_t step = forOp.getStepAsInt();
75 b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, tripCountOperands);
102 static void replaceIterArgsAndYieldResults(AffineForOp forOp) {
104 auto iterOperands = forOp.getInits();
105 auto iterArgs = forOp.getRegionIterArgs();
110 auto outerResults = forOp.getResults();
111 auto innerResults = forOp.getBody()->getTerminator()->getOperands();
116 /// Promotes the loop body of a forOp to its containing block if the forOp
118 LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
119 std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
124 if (forOp.getLowerBoundMap().getNumResults() != 1)
128 auto iv = forOp.getInductionVar();
129 auto *parentBlock = forOp->getBlock();
131 if (forOp.hasConstantLowerBound()) {
132 auto func = forOp->getParentOfType<FunctionOpInterface>();
133 OpBuilder builder(forOp->getContext());
137 builder.setInsertionPoint(forOp);
139 forOp.getLoc(), forOp.getConstantLowerBound());
142 auto lbOperands = forOp.getLowerBoundOperands();
143 auto lbMap = forOp.getLowerBoundMap();
144 OpBuilder builder(forOp);
150 builder.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
156 replaceIterArgsAndYieldResults(forOp);
160 forOp.getBody()->back().erase();
161 parentBlock->getOperations().splice(Block::iterator(forOp),
162 forOp.getBody()->getOperations());
163 forOp.erase();
229 LogicalResult mlir::affine::affineForOpBodySkew(AffineForOp forOp,
232 assert(forOp.getBody()->getOperations().size() == shifts.size() &&
234 if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
241 auto mayBeConstTripCount = getConstantTripCount(forOp);
243 LLVM_DEBUG(forOp.emitRemark("non-constant trip count loop not handled"));
248 assert(isOpwiseShiftValid(forOp, shifts) &&
251 int64_t step = forOp.getStepAsInt();
259 forOp.emitWarning("not shifting because shifts are unrealistically large");
268 for (auto &op : forOp.getBody()->without_terminator()) {
285 auto origLbMap = forOp.getLowerBoundMap();
287 OpBuilder b(forOp);
303 opGroupQueue, /*offset=*/0, forOp, b);
310 opGroupQueue, /*offset=*/0, forOp, b);
342 opGroupQueue, /*offset=*/i, forOp, b);
349 forOp.erase();
877 for (AffineForOp forOp : f.getOps<AffineForOp>()) {
879 getPerfectlyNestedLoops(band, forOp);
885 LogicalResult mlir::affine::loopUnrollFull(AffineForOp forOp) {
886 std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
892 return promoteIfSingleIteration(forOp);
893 return loopUnrollByFactor(forOp, tripCount);
900 LogicalResult mlir::affine::loopUnrollUpToFactor(AffineForOp forOp,
902 std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
905 return loopUnrollByFactor(forOp, *mayBeConstantTripCount);
906 return loopUnrollByFactor(forOp, unrollFactor);
919 // 'forOp'.
930 // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
946 // Clone the original body of 'forOp'.
974 static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp,
976 // Insert the cleanup loop right after 'forOp'.
977 OpBuilder builder(forOp->getBlock(), std::next(Block::iterator(forOp)));
978 auto cleanupForOp = cast<AffineForOp>(builder.clone(*forOp));
980 // Update uses of `forOp` results. `cleanupForOp` should use `forOp` result
981 // and produce results for the original users of `forOp` results.
982 auto results = forOp.getResults();
993 getCleanupLoopLowerBound(forOp, unrollFactor, cleanupMap, cleanupOperands);
1003 forOp.setUpperBound(cleanupOperands, cleanupMap);
1010 AffineForOp forOp, uint64_t unrollFactor,
1015 std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
1018 failed(promoteIfSingleIteration(forOp)))
1024 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1031 return loopUnrollFull(forOp);
1038 if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
1043 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1044 forOp.getUpperBoundMap().getNumResults() != 1)
1048 return loopUnrollFull(forOp);
1049 if (failed(generateCleanupLoopForUnroll(forOp, unrollFactor)))
1054 ValueRange iterArgs(forOp.getRegionIterArgs());
1055 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
1058 int64_t step = forOp.getStepAsInt();
1059 forOp.setStep(step * unrollFactor);
1061 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
1066 return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, iv);
1072 (void)promoteIfSingleIteration(forOp);
1076 LogicalResult mlir::affine::loopUnrollJamUpToFactor(AffineForOp forOp,
1078 std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
1081 return loopUnrollJamByFactor(forOp, *mayBeConstantTripCount);
1082 return loopUnrollJamByFactor(forOp, unrollJamFactor);
1085 /// Check if all control operands of all loops are defined outside of `forOp`
1087 static bool areInnerBoundsInvariant(AffineForOp forOp) {
1088 auto walkResult = forOp.walk([&](AffineForOp aForOp) {
1090 if (!forOp.isDefinedOutsideOfLoop(controlOperand))
1099 LogicalResult mlir::affine::loopUnrollJamByFactor(AffineForOp forOp,
1103 std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
1106 failed(promoteIfSingleIteration(forOp)))
1112 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1121 // If any control operand of any inner loop of `forOp` is defined within
1122 // `forOp`, no unroll jam.
1123 if (!areInnerBoundsInvariant(forOp))
1128 jbg.walk(forOp);
1133 forOp.walk([&](AffineForOp aForOp) {
1140 if (forOp.getNumIterOperands() > 0)
1141 getSupportedReductions(forOp, reductions);
1145 if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
1150 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1151 forOp.getUpperBoundMap().getNumResults() != 1)
1153 if (failed(generateCleanupLoopForUnroll(forOp, unrollJamFactor)))
1166 IRRewriter rewriter(forOp.getContext());
1181 bool forOpReplaced = oldForOp == forOp;
1189 // `forOp` has been replaced with a new loop.
1191 forOp = newForOp;
1213 int64_t step = forOp.getStepAsInt();
1214 forOp.setStep(step * unrollJamFactor);
1216 auto forOpIV = forOp.getInductionVar();
1231 builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
1260 if (forOp.getNumResults() > 0) {
1265 rewriter.setInsertionPointAfter(forOp);
1266 auto loc = forOp.getLoc();
1267 unsigned oldNumResults = forOp.getNumResults() / unrollJamFactor;
1270 Value lhs = forOp.getResult(pos);
1274 rhs = forOp.getResult(i * oldNumResults + pos);
1284 forOp.getResult(pos).replaceAllUsesExcept(lhs, newOps);
1289 (void)promoteIfSingleIteration(forOp);
1458 AffineForOp mlir::affine::sinkSequentialLoops(AffineForOp forOp) {
1460 getPerfectlyNestedLoops(loops, forOp);
1462 return forOp;
1499 return forOp;
1525 // Stripmines `forOp` by `factor` and sinks it under each of the `targets`.
1530 // responsibility to specify `targets` that are dominated by `forOp`.
1534 stripmineSink(AffineForOp forOp, uint64_t factor,
1536 auto originalStep = forOp.getStepAsInt();
1538 forOp.setStep(scaledStep);
1540 OpBuilder b(forOp->getBlock(), std::next(Block::iterator(forOp)));
1543 auto lbMap = forOp.getLowerBoundMap();
1544 SmallVector<Value, 4> lbOperands(forOp.getLowerBoundOperands());
1545 augmentMapAndBounds(b, forOp.getInductionVar(), &lbMap, &lbOperands);
1548 auto ubMap = forOp.getUpperBoundMap();
1549 SmallVector<Value, 4> ubOperands(forOp.getUpperBoundOperands());
1550 augmentMapAndBounds(b, forOp.getInductionVar(), &ubMap, &ubOperands,
1553 auto iv = forOp.getInductionVar();
1574 // Stripmines a `forOp` by `factor` and sinks it under a single `target`.
1577 static AffineForOp stripmineSink(AffineForOp forOp, SizeType factor,
1580 // forOp and that targets are not nested under each other when DominanceInfo
1583 auto res = stripmineSink(forOp, factor, ArrayRef<AffineForOp>(target));
1584 assert(res.size() == 1 && "Expected 1 inner forOp");
1726 void mlir::affine::mapLoopToProcessorIds(scf::ForOp forOp,
1733 OpBuilder b(forOp);
1734 Location loc(forOp.getLoc());
1736 bindSymbols(forOp.getContext(), lhs, rhs);
1749 loc, mulMap, ValueRange{linearIndex, forOp.getStep()});
1751 loc, addMap, ValueRange{mulApplyOp, forOp.getLowerBound()});
1752 forOp.setLowerBound(lb);
1754 Value step = forOp.getStep();
1757 forOp.setStep(step);
1868 auto forOp = createCanonicalizedAffineForOp(b, loc, lbOperands, lbMaps[d],
1871 copyNestRoot = forOp;
1873 b = OpBuilder::atBlockTerminator(forOp.getBody());
1884 fastBufMapOperands.push_back(forOp.getInductionVar());
1888 memIndices.push_back(forOp.getInductionVar());
2447 AffineForOp forOp;
2448 if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) {
2449 LLVM_DEBUG(forOp.emitRemark()
2466 AffineForOp forOp, const AffineCopyOptions &copyOptions,
2468 return affineDataCopyGenerate(forOp.getBody()->begin(),
2469 std::prev(forOp.getBody()->end()), copyOptions,
2509 if (auto forOp = dyn_cast<AffineForOp>(op)) {
2510 depthToLoops[currLoopDepth].push_back(forOp);
2511 gatherLoopsInBlock(forOp.getBody(), currLoopDepth + 1, depthToLoops);