1 //===- FusePadOpWithLinalgProducer.cpp ---- Fuse pad with linalg producer -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements patterns that fuses a linalg.generic -> tensor.pad op
10 // chain into a tensor.extract_slice -> linalg.generic -> tensor.insert_slice
11 // op chain.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
16
17 #include "mlir/Dialect/Linalg/IR/Linalg.h"
18 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
19
20 using namespace mlir;
21
22 namespace {
23
24 /// A sequence of operations
25 ///
26 /// ```mlir
27 /// %0 = linalg. ...
28 /// %1 = tensor.pad %0 ...
29 /// ```
30 ///
31 /// can be replaced with
32 ///
33 /// ```mlir
34 /// %0 = linalg.fill
35 /// %1 = tensor.extract_slice %0 ...
36 /// %2 = linalg. .... outs(..., %1, ....) ....
37 /// %3 = tensor.insert_slice %2 into %1 ...
38 /// ```
39 ///
40 /// if the `linalg.generic` has all parallel iterator types.
41 struct FusePadOp : OpRewritePattern<tensor::PadOp> {
42 using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
43
matchAndRewrite__anon5efdbf230111::FusePadOp44 LogicalResult matchAndRewrite(tensor::PadOp padOp,
45 PatternRewriter &rewriter) const override {
46 // Only works on padding op that sets the padded value to a constant.
47 Value padValue = padOp.getConstantPaddingValue();
48 if (!padValue)
49 return rewriter.notifyMatchFailure(padOp, "non constant padding");
50
51 // This pattern could work for any Linalg op. For now restrict it to generic
52 // ops.
53 Value source = padOp.getSource();
54 auto linalgOp = source.getDefiningOp<linalg::GenericOp>();
55 if (!linalgOp) {
56 return rewriter.notifyMatchFailure(
57 padOp, "expected source to be linalg.generic op");
58 }
59 // All iterator types need to be parallel.
60 if (linalgOp.getNumLoops() != linalgOp.getNumParallelLoops()) {
61 return rewriter.notifyMatchFailure(
62 padOp, "only supported for ops with all parallel iterator types");
63 }
64 ReifiedRankedShapedTypeDims resultShape;
65 if (failed(reifyResultShapes(rewriter, padOp, resultShape)) ||
66 resultShape.size() != 1) {
67 return rewriter.notifyMatchFailure(
68 padOp, "failed to get shape of pad op result");
69 }
70
71 Location loc = padOp.getLoc();
72
73 // Create the tensor of same size as output of the pad op.
74 RankedTensorType padResultType = padOp.getResultType();
75 auto resultSizes = resultShape[0];
76 auto emptyTensor = rewriter.create<tensor::EmptyOp>(
77 loc, resultSizes, padResultType.getElementType());
78
79 // Fill the tensor with the pad value.
80 // TODO: There is an option to fill only the boundaries. For now just
81 // filling the whole tensor.
82 auto fillTensor =
83 rewriter.create<linalg::FillOp>(loc, padValue, emptyTensor.getResult());
84
85 // Construct a slice of the fill result that is to be replaced with the
86 // result of the generic op. The low pad values are the offsets, the size of
87 // the source is the size of the slice.
88 // TODO: This insert/extract could be potentially made a utility method.
89 unsigned resultNumber = cast<OpResult>(source).getResultNumber();
90 SmallVector<OpFoldResult> offsets = padOp.getMixedLowPad();
91 SmallVector<OpFoldResult> sizes;
92 sizes.reserve(offsets.size());
93 for (const auto &shape :
94 llvm::enumerate(cast<RankedTensorType>(source.getType()).getShape())) {
95 if (ShapedType::isDynamic(shape.value())) {
96 sizes.push_back(
97 rewriter.create<tensor::DimOp>(loc, source, shape.index())
98 .getResult());
99 } else {
100 sizes.push_back(rewriter.getIndexAttr(shape.value()));
101 }
102 }
103 SmallVector<OpFoldResult> strides(offsets.size(), rewriter.getIndexAttr(1));
104 auto slice = rewriter.create<tensor::ExtractSliceOp>(
105 loc, fillTensor.getResult(0), offsets, sizes, strides);
106
107 // Clone the generic op.
108 auto clonedOp =
109 cast<linalg::GenericOp>(rewriter.clone(*linalgOp.getOperation()));
110 clonedOp.setDpsInitOperand(resultNumber, slice.getResult());
111
112 // Insert it back into the result of the fill.
113 rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
114 padOp, clonedOp.getResult(resultNumber), fillTensor.getResult(0),
115 offsets, sizes, strides);
116 return success();
117 }
118 };
119 } // namespace
120
populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet & patterns)121 void mlir::linalg::populateFuseTensorPadWithProducerLinalgOpPatterns(
122 RewritePatternSet &patterns) {
123 patterns.add<FusePadOp>(patterns.getContext());
124 }
125