xref: /llvm-project/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp (revision e2310704d890ad252aeb1ca28b4b84d29514b1d1)
1 //===- Promotion.cpp - Implementation of linalg Promotion -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the linalg dialect Promotion pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PassDetail.h"
14 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
15 #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
16 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
17 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
18 #include "mlir/Dialect/Linalg/Passes.h"
19 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
20 #include "mlir/Dialect/Linalg/Utils/Utils.h"
21 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
22 #include "mlir/Dialect/SCF/SCF.h"
23 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
24 #include "mlir/IR/AffineExpr.h"
25 #include "mlir/IR/AffineExprVisitor.h"
26 #include "mlir/IR/AffineMap.h"
27 #include "mlir/Support/LLVM.h"
28 #include "mlir/Transforms/FoldUtils.h"
29 #include "llvm/ADT/MapVector.h"
30 #include "llvm/Support/CommandLine.h"
31 
32 using namespace mlir;
33 using namespace mlir::edsc;
34 using namespace mlir::edsc::intrinsics;
35 using namespace mlir::linalg;
36 using namespace mlir::scf;
37 
38 using llvm::MapVector;
39 
40 using folded_affine_min = FoldedValueBuilder<AffineMinOp>;
41 using folded_linalg_range = FoldedValueBuilder<linalg::RangeOp>;
42 using folded_memref_dim = FoldedValueBuilder<memref::DimOp>;
43 using folded_memref_subview = FoldedValueBuilder<memref::SubViewOp>;
44 using folded_memref_view = FoldedValueBuilder<memref::ViewOp>;
45 
46 #define DEBUG_TYPE "linalg-promotion"
47 
48 /// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly
49 /// the size needed, otherwise try to allocate a static bounding box.
50 static Value allocBuffer(const LinalgPromotionOptions &options,
51                          Type elementType, Value size, bool dynamicBuffers,
52                          OperationFolder *folder,
53                          Optional<unsigned> alignment = None) {
54   auto *ctx = size.getContext();
55   auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8);
56   IntegerAttr alignment_attr;
57   if (alignment.hasValue())
58     alignment_attr =
59         IntegerAttr::get(IntegerType::get(ctx, 64), alignment.getValue());
60   if (!dynamicBuffers)
61     if (auto cst = size.getDefiningOp<ConstantIndexOp>())
62       return options.useAlloca
63                  ? memref_alloca(MemRefType::get(width * cst.getValue(),
64                                                  IntegerType::get(ctx, 8)),
65                                  ValueRange{}, alignment_attr)
66                        .value
67                  : memref_alloc(MemRefType::get(width * cst.getValue(),
68                                                 IntegerType::get(ctx, 8)),
69                                 ValueRange{}, alignment_attr)
70                        .value;
71   Value mul =
72       folded_std_muli(folder, folded_std_constant_index(folder, width), size);
73   return options.useAlloca
74              ? memref_alloca(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul,
75                              alignment_attr)
76                    .value
77              : memref_alloc(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul,
78                             alignment_attr)
79                    .value;
80 }
81 
82 /// Default allocation callback function. This allocates a promoted buffer when
83 /// no call back to do so is provided. The default is to allocate a
84 /// memref<..xi8> and return a view to get a memref type of shape
85 /// boundingSubViewSize.
86 static Optional<Value>
87 defaultAllocBufferCallBack(const LinalgPromotionOptions &options,
88                            OpBuilder &builder, memref::SubViewOp subView,
89                            ArrayRef<Value> boundingSubViewSize,
90                            bool dynamicBuffers, Optional<unsigned> alignment,
91                            OperationFolder *folder) {
92   ShapedType viewType = subView.getType();
93   int64_t rank = viewType.getRank();
94   (void)rank;
95   assert(rank > 0 && boundingSubViewSize.size() == static_cast<size_t>(rank));
96   auto zero = folded_std_constant_index(folder, 0);
97   auto one = folded_std_constant_index(folder, 1);
98 
99   Value allocSize = one;
100   for (auto size : llvm::enumerate(boundingSubViewSize))
101     allocSize = folded_std_muli(folder, allocSize, size.value());
102   Value buffer = allocBuffer(options, viewType.getElementType(), allocSize,
103                              dynamicBuffers, folder, alignment);
104   SmallVector<int64_t, 4> dynSizes(boundingSubViewSize.size(),
105                                    ShapedType::kDynamicSize);
106   Value view = folded_memref_view(
107       folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer,
108       zero, boundingSubViewSize);
109   return view;
110 }
111 
112 /// Default implementation of deallocation of the buffer use for promotion. It
113 /// expects to get the same value that the default allocation method returned,
114 /// i.e. result of a ViewOp.
115 static LogicalResult
116 defaultDeallocBufferCallBack(const LinalgPromotionOptions &options,
117                              OpBuilder &b, Value fullLocalView) {
118   auto viewOp = fullLocalView.getDefiningOp<memref::ViewOp>();
119   assert(viewOp && "expected full local view to be a ViewOp");
120   if (!options.useAlloca)
121     memref_dealloc(viewOp.source());
122   return success();
123 }
124 
125 namespace {
126 
127 /// Helper struct that captures the information required to apply the
128 /// transformation on each op. This bridges the abstraction gap with the
129 /// user-facing API which exposes positional arguments to control which operands
130 /// are promoted.
131 struct LinalgOpInstancePromotionOptions {
132   LinalgOpInstancePromotionOptions(LinalgOp op,
133                                    const LinalgPromotionOptions &options);
134   /// SubViews to promote.
135   MapVector<unsigned, Value> subViews;
136   /// True if the full view should be used for the promoted buffer.
137   DenseMap<Value, bool> useFullTileBuffers;
138 
139   /// Callback functions for allocation and deallocation of promoted buffers, as
140   /// well as to copy the data into and out of these buffers.
141   AllocBufferCallbackFn allocationFn;
142   DeallocBufferCallbackFn deallocationFn;
143   CopyCallbackFn copyInFn;
144   CopyCallbackFn copyOutFn;
145 
146   /// Allow the use of dynamically-sized buffers.
147   bool dynamicBuffers;
148   /// Alignment of promoted buffer.
149   Optional<unsigned> alignment;
150 };
151 } // namespace
152 
153 LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions(
154     LinalgOp linalgOp, const LinalgPromotionOptions &options)
155     : subViews(), dynamicBuffers(options.dynamicBuffers),
156       alignment(options.alignment) {
157   assert(linalgOp.hasBufferSemantics() && "revisit usage of shaped operand");
158   unsigned nBuffers = linalgOp.getNumShapedOperands();
159   auto vUseFullTileBuffers =
160       options.useFullTileBuffers.getValueOr(llvm::SmallBitVector());
161   vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault);
162 
163   for (unsigned idx = 0; idx != nBuffers; ++idx) {
164     if (options.operandsToPromote && !options.operandsToPromote->count(idx))
165       continue;
166     auto *op = linalgOp.getShapedOperand(idx).getDefiningOp();
167     if (auto sv = dyn_cast_or_null<memref::SubViewOp>(op)) {
168       subViews[idx] = sv;
169       useFullTileBuffers[sv] = vUseFullTileBuffers[idx];
170     }
171   }
172 
173   allocationFn = (options.allocationFn
174                       ? *(options.allocationFn)
175                       : [&](OpBuilder &builder, memref::SubViewOp subViewOp,
176                             ArrayRef<Value> boundingSubViewSize,
177                             OperationFolder *folder) -> Optional<Value> {
178     return defaultAllocBufferCallBack(options, builder, subViewOp,
179                                       boundingSubViewSize, dynamicBuffers,
180                                       alignment, folder);
181   });
182   deallocationFn =
183       (options.deallocationFn
184            ? *(options.deallocationFn)
185            : [&](OpBuilder &b, Value buffer) {
186                return defaultDeallocBufferCallBack(options, b, buffer);
187              });
188   auto defaultCopyCallBack = [&](OpBuilder &builder, Value src,
189                                  Value dst) -> LogicalResult {
190     linalg_copy(src, dst);
191     return success();
192   };
193   copyInFn = (options.copyInFn ? *(options.copyInFn) : defaultCopyCallBack);
194   copyOutFn = (options.copyOutFn ? *(options.copyOutFn) : defaultCopyCallBack);
195 }
196 
197 // Performs promotion of a `subView` into a local buffer of the size of the
198 // *ranges* of the `subView`. This produces a buffer whose size may be bigger
199 // than the actual size of the `subView` at the boundaries.
200 // This is related to the full/partial tile problem.
201 // Returns a PromotionInfo containing a `buffer`, `fullLocalView` and
202 // `partialLocalView` such that:
203 //   * `buffer` is always the size of the full tile.
204 //   * `fullLocalView` is a dense contiguous view into that buffer.
205 //   * `partialLocalView` is a dense non-contiguous slice of `fullLocalView`
206 //     that corresponds to the size of `subView` and accounting for boundary
207 //     effects.
208 // The point of the full tile buffer is that constant static tile sizes are
209 // folded and result in a buffer type with statically known size and alignment
210 // properties.
211 // To account for general boundary effects, padding must be performed on the
212 // boundary tiles. For now this is done with an unconditional `fill` op followed
213 // by a partial `copy` op.
214 Optional<PromotionInfo> mlir::linalg::promoteSubviewAsNewBuffer(
215     OpBuilder &b, Location loc, memref::SubViewOp subView,
216     AllocBufferCallbackFn allocationFn, OperationFolder *folder) {
217   ScopedContext scopedContext(b, loc);
218   auto viewType = subView.getType();
219   auto rank = viewType.getRank();
220   SmallVector<Value, 4> fullSizes;
221   SmallVector<OpFoldResult> partialSizes;
222   fullSizes.reserve(rank);
223   partialSizes.reserve(rank);
224   for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) {
225     auto rangeValue = en.value();
226     // Try to extract a tight constant.
227     LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n");
228     IntegerAttr sizeAttr = getSmallestBoundingIndex(rangeValue.size);
229     Value size =
230         (!sizeAttr) ? rangeValue.size : b.create<ConstantOp>(loc, sizeAttr);
231     LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n");
232     fullSizes.push_back(size);
233     partialSizes.push_back(
234         folded_memref_dim(folder, subView, en.index()).value);
235   }
236   SmallVector<int64_t, 4> dynSizes(fullSizes.size(), -1);
237   // If a callback is not specified, then use the default implementation for
238   // allocating the promoted buffer.
239   Optional<Value> fullLocalView = allocationFn(b, subView, fullSizes, folder);
240   if (!fullLocalView)
241     return {};
242   SmallVector<OpFoldResult, 4> zeros(fullSizes.size(), b.getIndexAttr(0));
243   SmallVector<OpFoldResult, 4> ones(fullSizes.size(), b.getIndexAttr(1));
244   auto partialLocalView =
245       folded_memref_subview(folder, *fullLocalView, zeros, partialSizes, ones);
246   return PromotionInfo{*fullLocalView, partialLocalView};
247 }
248 
249 static Optional<MapVector<unsigned, PromotionInfo>>
250 promoteSubViews(OpBuilder &b, Location loc,
251                 LinalgOpInstancePromotionOptions options,
252                 OperationFolder *folder) {
253   if (options.subViews.empty())
254     return {};
255 
256   ScopedContext scope(b, loc);
257   MapVector<unsigned, PromotionInfo> promotionInfoMap;
258 
259   for (auto v : options.subViews) {
260     memref::SubViewOp subView =
261         cast<memref::SubViewOp>(v.second.getDefiningOp());
262     Optional<PromotionInfo> promotionInfo = promoteSubviewAsNewBuffer(
263         b, loc, subView, options.allocationFn, folder);
264     if (!promotionInfo)
265       return {};
266     promotionInfoMap[v.first] = *promotionInfo;
267 
268     // Only fill the buffer if the full local view is used
269     if (!options.useFullTileBuffers[v.second])
270       continue;
271     Value fillVal;
272     if (auto t = subView.getType().getElementType().dyn_cast<FloatType>())
273       fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0));
274     else if (auto t =
275                  subView.getType().getElementType().dyn_cast<IntegerType>())
276       fillVal = folded_std_constant_int(folder, 0, t);
277     linalg_fill(promotionInfo->fullLocalView, fillVal);
278   }
279 
280   // Copy data into the promoted buffers. Use callback if provided.
281   for (auto v : options.subViews) {
282     auto info = promotionInfoMap.find(v.first);
283     if (info == promotionInfoMap.end())
284       continue;
285     if (failed(options.copyInFn(
286             b, cast<memref::SubViewOp>(v.second.getDefiningOp()),
287             info->second.partialLocalView)))
288       return {};
289   }
290   return promotionInfoMap;
291 }
292 
293 static Optional<LinalgOp>
294 promoteSubViews(OpBuilder &b, LinalgOp op,
295                 LinalgOpInstancePromotionOptions options,
296                 OperationFolder *folder) {
297   assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics");
298 
299   if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) {
300     // TODO: add a level of indirection to linalg.generic.
301     if (convOp.padding())
302       return {};
303   }
304 
305   // 1. Promote the specified views and use them in the new op.
306   auto loc = op.getLoc();
307   auto promotedBuffersAndViews = promoteSubViews(b, loc, options, folder);
308   if (!promotedBuffersAndViews ||
309       promotedBuffersAndViews->size() != options.subViews.size())
310     return {};
311 
312   // 2. Append all other operands as they appear, this enforces that such
313   // operands are not views. This is to support cases such as FillOp taking
314   // extra scalars etc.  Keep a reference to output buffers;
315   SmallVector<Value, 8> opViews;
316   opViews.reserve(op.getNumShapedOperands());
317   SmallVector<std::pair<Value, Value>, 8> writebackViews;
318   writebackViews.reserve(promotedBuffersAndViews->size());
319   for (auto view : llvm::enumerate(op.getShapedOperands())) {
320     if (options.subViews.count(view.index()) != 0) {
321       if (options.useFullTileBuffers[view.value()])
322         opViews.push_back(
323             (*promotedBuffersAndViews)[view.index()].fullLocalView);
324       else
325         opViews.push_back(
326             (*promotedBuffersAndViews)[view.index()].partialLocalView);
327       if (view.index() >= op.getNumInputs())
328         writebackViews.emplace_back(std::make_pair(
329             view.value(),
330             (*promotedBuffersAndViews)[view.index()].partialLocalView));
331     } else {
332       opViews.push_back(view.value());
333     }
334   }
335   op->setOperands(0, opViews.size(), opViews);
336 
337   OpBuilder::InsertionGuard guard(b);
338   b.setInsertionPointAfter(op);
339   ScopedContext scope(b, loc);
340   // 3. Emit write-back for the promoted output views: copy the partial view.
341   for (auto viewAndPartialLocalView : writebackViews) {
342     if (failed(options.copyOutFn(b, viewAndPartialLocalView.second,
343                                  viewAndPartialLocalView.first)))
344       return {};
345   }
346 
347   // 4. Dealloc all local buffers.
348   for (const auto &pi : *promotedBuffersAndViews)
349     (void)options.deallocationFn(b, pi.second.fullLocalView);
350   return op;
351 }
352 
353 LogicalResult
354 mlir::linalg::promoteSubviewsPrecondition(Operation *op,
355                                           LinalgPromotionOptions options) {
356   LinalgOp linOp = dyn_cast<LinalgOp>(op);
357   // Transformation applies to buffers only.
358   if (!linOp || !linOp.hasBufferSemantics())
359     return failure();
360   // Check that at least one of the requested operands is indeed a subview.
361   for (auto en : llvm::enumerate(linOp.getShapedOperands())) {
362     auto sv = isa_and_nonnull<memref::SubViewOp>(en.value().getDefiningOp());
363     if (sv) {
364       if (!options.operandsToPromote.hasValue() ||
365           options.operandsToPromote->count(en.index()))
366         return success();
367     }
368   }
369   // TODO: Check all subviews requested are bound by a static constant.
370   // TODO: Check that the total footprint fits within a given size.
371   return failure();
372 }
373 
374 Optional<LinalgOp> mlir::linalg::promoteSubViews(OpBuilder &b,
375                                                  LinalgOp linalgOp,
376                                                  LinalgPromotionOptions options,
377                                                  OperationFolder *folder) {
378   LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options);
379   return ::promoteSubViews(
380       b, linalgOp, LinalgOpInstancePromotionOptions(linalgOp, options), folder);
381 }
382 
383 namespace {
384 struct LinalgPromotionPass : public LinalgPromotionBase<LinalgPromotionPass> {
385   LinalgPromotionPass() = default;
386   LinalgPromotionPass(bool dynamicBuffers, bool useAlloca) {
387     this->dynamicBuffers = dynamicBuffers;
388     this->useAlloca = useAlloca;
389   }
390 
391   void runOnFunction() override {
392     OperationFolder folder(&getContext());
393     getFunction().walk([this, &folder](LinalgOp op) {
394       auto options = LinalgPromotionOptions()
395                          .setDynamicBuffers(dynamicBuffers)
396                          .setUseAlloca(useAlloca);
397       if (failed(promoteSubviewsPrecondition(op, options)))
398         return;
399       LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n");
400       OpBuilder b(op);
401       promoteSubViews(b, op, options, &folder);
402     });
403   }
404 };
405 } // namespace
406 
407 // TODO: support more transformation options in the pass.
408 std::unique_ptr<OperationPass<FuncOp>>
409 mlir::createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca) {
410   return std::make_unique<LinalgPromotionPass>(dynamicBuffers, useAlloca);
411 }
412 std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgPromotionPass() {
413   return std::make_unique<LinalgPromotionPass>();
414 }
415