1 //===- Promotion.cpp - Implementation of linalg Promotion -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the linalg dialect Promotion pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PassDetail.h" 14 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" 15 #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h" 16 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 17 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" 18 #include "mlir/Dialect/Linalg/Passes.h" 19 #include "mlir/Dialect/Linalg/Transforms/Transforms.h" 20 #include "mlir/Dialect/Linalg/Utils/Utils.h" 21 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" 22 #include "mlir/Dialect/SCF/SCF.h" 23 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" 24 #include "mlir/IR/AffineExpr.h" 25 #include "mlir/IR/AffineExprVisitor.h" 26 #include "mlir/IR/AffineMap.h" 27 #include "mlir/Support/LLVM.h" 28 #include "mlir/Transforms/FoldUtils.h" 29 #include "llvm/ADT/MapVector.h" 30 #include "llvm/Support/CommandLine.h" 31 32 using namespace mlir; 33 using namespace mlir::edsc; 34 using namespace mlir::edsc::intrinsics; 35 using namespace mlir::linalg; 36 using namespace mlir::scf; 37 38 using llvm::MapVector; 39 40 using folded_affine_min = FoldedValueBuilder<AffineMinOp>; 41 using folded_linalg_range = FoldedValueBuilder<linalg::RangeOp>; 42 using folded_memref_dim = FoldedValueBuilder<memref::DimOp>; 43 using folded_memref_subview = FoldedValueBuilder<memref::SubViewOp>; 44 using folded_memref_view = FoldedValueBuilder<memref::ViewOp>; 45 46 #define DEBUG_TYPE "linalg-promotion" 47 48 /// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly 49 /// the size needed, otherwise try to allocate a static bounding box. 50 static Value allocBuffer(const LinalgPromotionOptions &options, 51 Type elementType, Value size, bool dynamicBuffers, 52 OperationFolder *folder, 53 Optional<unsigned> alignment = None) { 54 auto *ctx = size.getContext(); 55 auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); 56 IntegerAttr alignment_attr; 57 if (alignment.hasValue()) 58 alignment_attr = 59 IntegerAttr::get(IntegerType::get(ctx, 64), alignment.getValue()); 60 if (!dynamicBuffers) 61 if (auto cst = size.getDefiningOp<ConstantIndexOp>()) 62 return options.useAlloca 63 ? memref_alloca(MemRefType::get(width * cst.getValue(), 64 IntegerType::get(ctx, 8)), 65 ValueRange{}, alignment_attr) 66 .value 67 : memref_alloc(MemRefType::get(width * cst.getValue(), 68 IntegerType::get(ctx, 8)), 69 ValueRange{}, alignment_attr) 70 .value; 71 Value mul = 72 folded_std_muli(folder, folded_std_constant_index(folder, width), size); 73 return options.useAlloca 74 ? memref_alloca(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul, 75 alignment_attr) 76 .value 77 : memref_alloc(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul, 78 alignment_attr) 79 .value; 80 } 81 82 /// Default allocation callback function. This allocates a promoted buffer when 83 /// no call back to do so is provided. The default is to allocate a 84 /// memref<..xi8> and return a view to get a memref type of shape 85 /// boundingSubViewSize. 86 static Optional<Value> 87 defaultAllocBufferCallBack(const LinalgPromotionOptions &options, 88 OpBuilder &builder, memref::SubViewOp subView, 89 ArrayRef<Value> boundingSubViewSize, 90 bool dynamicBuffers, Optional<unsigned> alignment, 91 OperationFolder *folder) { 92 ShapedType viewType = subView.getType(); 93 int64_t rank = viewType.getRank(); 94 (void)rank; 95 assert(rank > 0 && boundingSubViewSize.size() == static_cast<size_t>(rank)); 96 auto zero = folded_std_constant_index(folder, 0); 97 auto one = folded_std_constant_index(folder, 1); 98 99 Value allocSize = one; 100 for (auto size : llvm::enumerate(boundingSubViewSize)) 101 allocSize = folded_std_muli(folder, allocSize, size.value()); 102 Value buffer = allocBuffer(options, viewType.getElementType(), allocSize, 103 dynamicBuffers, folder, alignment); 104 SmallVector<int64_t, 4> dynSizes(boundingSubViewSize.size(), 105 ShapedType::kDynamicSize); 106 Value view = folded_memref_view( 107 folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer, 108 zero, boundingSubViewSize); 109 return view; 110 } 111 112 /// Default implementation of deallocation of the buffer use for promotion. It 113 /// expects to get the same value that the default allocation method returned, 114 /// i.e. result of a ViewOp. 115 static LogicalResult 116 defaultDeallocBufferCallBack(const LinalgPromotionOptions &options, 117 OpBuilder &b, Value fullLocalView) { 118 auto viewOp = fullLocalView.getDefiningOp<memref::ViewOp>(); 119 assert(viewOp && "expected full local view to be a ViewOp"); 120 if (!options.useAlloca) 121 memref_dealloc(viewOp.source()); 122 return success(); 123 } 124 125 namespace { 126 127 /// Helper struct that captures the information required to apply the 128 /// transformation on each op. This bridges the abstraction gap with the 129 /// user-facing API which exposes positional arguments to control which operands 130 /// are promoted. 131 struct LinalgOpInstancePromotionOptions { 132 LinalgOpInstancePromotionOptions(LinalgOp op, 133 const LinalgPromotionOptions &options); 134 /// SubViews to promote. 135 MapVector<unsigned, Value> subViews; 136 /// True if the full view should be used for the promoted buffer. 137 DenseMap<Value, bool> useFullTileBuffers; 138 139 /// Callback functions for allocation and deallocation of promoted buffers, as 140 /// well as to copy the data into and out of these buffers. 141 AllocBufferCallbackFn allocationFn; 142 DeallocBufferCallbackFn deallocationFn; 143 CopyCallbackFn copyInFn; 144 CopyCallbackFn copyOutFn; 145 146 /// Allow the use of dynamically-sized buffers. 147 bool dynamicBuffers; 148 /// Alignment of promoted buffer. 149 Optional<unsigned> alignment; 150 }; 151 } // namespace 152 153 LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( 154 LinalgOp linalgOp, const LinalgPromotionOptions &options) 155 : subViews(), dynamicBuffers(options.dynamicBuffers), 156 alignment(options.alignment) { 157 assert(linalgOp.hasBufferSemantics() && "revisit usage of shaped operand"); 158 unsigned nBuffers = linalgOp.getNumShapedOperands(); 159 auto vUseFullTileBuffers = 160 options.useFullTileBuffers.getValueOr(llvm::SmallBitVector()); 161 vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault); 162 163 for (unsigned idx = 0; idx != nBuffers; ++idx) { 164 if (options.operandsToPromote && !options.operandsToPromote->count(idx)) 165 continue; 166 auto *op = linalgOp.getShapedOperand(idx).getDefiningOp(); 167 if (auto sv = dyn_cast_or_null<memref::SubViewOp>(op)) { 168 subViews[idx] = sv; 169 useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; 170 } 171 } 172 173 allocationFn = (options.allocationFn 174 ? *(options.allocationFn) 175 : [&](OpBuilder &builder, memref::SubViewOp subViewOp, 176 ArrayRef<Value> boundingSubViewSize, 177 OperationFolder *folder) -> Optional<Value> { 178 return defaultAllocBufferCallBack(options, builder, subViewOp, 179 boundingSubViewSize, dynamicBuffers, 180 alignment, folder); 181 }); 182 deallocationFn = 183 (options.deallocationFn 184 ? *(options.deallocationFn) 185 : [&](OpBuilder &b, Value buffer) { 186 return defaultDeallocBufferCallBack(options, b, buffer); 187 }); 188 auto defaultCopyCallBack = [&](OpBuilder &builder, Value src, 189 Value dst) -> LogicalResult { 190 linalg_copy(src, dst); 191 return success(); 192 }; 193 copyInFn = (options.copyInFn ? *(options.copyInFn) : defaultCopyCallBack); 194 copyOutFn = (options.copyOutFn ? *(options.copyOutFn) : defaultCopyCallBack); 195 } 196 197 // Performs promotion of a `subView` into a local buffer of the size of the 198 // *ranges* of the `subView`. This produces a buffer whose size may be bigger 199 // than the actual size of the `subView` at the boundaries. 200 // This is related to the full/partial tile problem. 201 // Returns a PromotionInfo containing a `buffer`, `fullLocalView` and 202 // `partialLocalView` such that: 203 // * `buffer` is always the size of the full tile. 204 // * `fullLocalView` is a dense contiguous view into that buffer. 205 // * `partialLocalView` is a dense non-contiguous slice of `fullLocalView` 206 // that corresponds to the size of `subView` and accounting for boundary 207 // effects. 208 // The point of the full tile buffer is that constant static tile sizes are 209 // folded and result in a buffer type with statically known size and alignment 210 // properties. 211 // To account for general boundary effects, padding must be performed on the 212 // boundary tiles. For now this is done with an unconditional `fill` op followed 213 // by a partial `copy` op. 214 Optional<PromotionInfo> mlir::linalg::promoteSubviewAsNewBuffer( 215 OpBuilder &b, Location loc, memref::SubViewOp subView, 216 AllocBufferCallbackFn allocationFn, OperationFolder *folder) { 217 ScopedContext scopedContext(b, loc); 218 auto viewType = subView.getType(); 219 auto rank = viewType.getRank(); 220 SmallVector<Value, 4> fullSizes; 221 SmallVector<OpFoldResult> partialSizes; 222 fullSizes.reserve(rank); 223 partialSizes.reserve(rank); 224 for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) { 225 auto rangeValue = en.value(); 226 // Try to extract a tight constant. 227 LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); 228 IntegerAttr sizeAttr = getSmallestBoundingIndex(rangeValue.size); 229 Value size = 230 (!sizeAttr) ? rangeValue.size : b.create<ConstantOp>(loc, sizeAttr); 231 LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n"); 232 fullSizes.push_back(size); 233 partialSizes.push_back( 234 folded_memref_dim(folder, subView, en.index()).value); 235 } 236 SmallVector<int64_t, 4> dynSizes(fullSizes.size(), -1); 237 // If a callback is not specified, then use the default implementation for 238 // allocating the promoted buffer. 239 Optional<Value> fullLocalView = allocationFn(b, subView, fullSizes, folder); 240 if (!fullLocalView) 241 return {}; 242 SmallVector<OpFoldResult, 4> zeros(fullSizes.size(), b.getIndexAttr(0)); 243 SmallVector<OpFoldResult, 4> ones(fullSizes.size(), b.getIndexAttr(1)); 244 auto partialLocalView = 245 folded_memref_subview(folder, *fullLocalView, zeros, partialSizes, ones); 246 return PromotionInfo{*fullLocalView, partialLocalView}; 247 } 248 249 static Optional<MapVector<unsigned, PromotionInfo>> 250 promoteSubViews(OpBuilder &b, Location loc, 251 LinalgOpInstancePromotionOptions options, 252 OperationFolder *folder) { 253 if (options.subViews.empty()) 254 return {}; 255 256 ScopedContext scope(b, loc); 257 MapVector<unsigned, PromotionInfo> promotionInfoMap; 258 259 for (auto v : options.subViews) { 260 memref::SubViewOp subView = 261 cast<memref::SubViewOp>(v.second.getDefiningOp()); 262 Optional<PromotionInfo> promotionInfo = promoteSubviewAsNewBuffer( 263 b, loc, subView, options.allocationFn, folder); 264 if (!promotionInfo) 265 return {}; 266 promotionInfoMap[v.first] = *promotionInfo; 267 268 // Only fill the buffer if the full local view is used 269 if (!options.useFullTileBuffers[v.second]) 270 continue; 271 Value fillVal; 272 if (auto t = subView.getType().getElementType().dyn_cast<FloatType>()) 273 fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0)); 274 else if (auto t = 275 subView.getType().getElementType().dyn_cast<IntegerType>()) 276 fillVal = folded_std_constant_int(folder, 0, t); 277 linalg_fill(promotionInfo->fullLocalView, fillVal); 278 } 279 280 // Copy data into the promoted buffers. Use callback if provided. 281 for (auto v : options.subViews) { 282 auto info = promotionInfoMap.find(v.first); 283 if (info == promotionInfoMap.end()) 284 continue; 285 if (failed(options.copyInFn( 286 b, cast<memref::SubViewOp>(v.second.getDefiningOp()), 287 info->second.partialLocalView))) 288 return {}; 289 } 290 return promotionInfoMap; 291 } 292 293 static Optional<LinalgOp> 294 promoteSubViews(OpBuilder &b, LinalgOp op, 295 LinalgOpInstancePromotionOptions options, 296 OperationFolder *folder) { 297 assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); 298 299 if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) { 300 // TODO: add a level of indirection to linalg.generic. 301 if (convOp.padding()) 302 return {}; 303 } 304 305 // 1. Promote the specified views and use them in the new op. 306 auto loc = op.getLoc(); 307 auto promotedBuffersAndViews = promoteSubViews(b, loc, options, folder); 308 if (!promotedBuffersAndViews || 309 promotedBuffersAndViews->size() != options.subViews.size()) 310 return {}; 311 312 // 2. Append all other operands as they appear, this enforces that such 313 // operands are not views. This is to support cases such as FillOp taking 314 // extra scalars etc. Keep a reference to output buffers; 315 SmallVector<Value, 8> opViews; 316 opViews.reserve(op.getNumShapedOperands()); 317 SmallVector<std::pair<Value, Value>, 8> writebackViews; 318 writebackViews.reserve(promotedBuffersAndViews->size()); 319 for (auto view : llvm::enumerate(op.getShapedOperands())) { 320 if (options.subViews.count(view.index()) != 0) { 321 if (options.useFullTileBuffers[view.value()]) 322 opViews.push_back( 323 (*promotedBuffersAndViews)[view.index()].fullLocalView); 324 else 325 opViews.push_back( 326 (*promotedBuffersAndViews)[view.index()].partialLocalView); 327 if (view.index() >= op.getNumInputs()) 328 writebackViews.emplace_back(std::make_pair( 329 view.value(), 330 (*promotedBuffersAndViews)[view.index()].partialLocalView)); 331 } else { 332 opViews.push_back(view.value()); 333 } 334 } 335 op->setOperands(0, opViews.size(), opViews); 336 337 OpBuilder::InsertionGuard guard(b); 338 b.setInsertionPointAfter(op); 339 ScopedContext scope(b, loc); 340 // 3. Emit write-back for the promoted output views: copy the partial view. 341 for (auto viewAndPartialLocalView : writebackViews) { 342 if (failed(options.copyOutFn(b, viewAndPartialLocalView.second, 343 viewAndPartialLocalView.first))) 344 return {}; 345 } 346 347 // 4. Dealloc all local buffers. 348 for (const auto &pi : *promotedBuffersAndViews) 349 (void)options.deallocationFn(b, pi.second.fullLocalView); 350 return op; 351 } 352 353 LogicalResult 354 mlir::linalg::promoteSubviewsPrecondition(Operation *op, 355 LinalgPromotionOptions options) { 356 LinalgOp linOp = dyn_cast<LinalgOp>(op); 357 // Transformation applies to buffers only. 358 if (!linOp || !linOp.hasBufferSemantics()) 359 return failure(); 360 // Check that at least one of the requested operands is indeed a subview. 361 for (auto en : llvm::enumerate(linOp.getShapedOperands())) { 362 auto sv = isa_and_nonnull<memref::SubViewOp>(en.value().getDefiningOp()); 363 if (sv) { 364 if (!options.operandsToPromote.hasValue() || 365 options.operandsToPromote->count(en.index())) 366 return success(); 367 } 368 } 369 // TODO: Check all subviews requested are bound by a static constant. 370 // TODO: Check that the total footprint fits within a given size. 371 return failure(); 372 } 373 374 Optional<LinalgOp> mlir::linalg::promoteSubViews(OpBuilder &b, 375 LinalgOp linalgOp, 376 LinalgPromotionOptions options, 377 OperationFolder *folder) { 378 LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options); 379 return ::promoteSubViews( 380 b, linalgOp, LinalgOpInstancePromotionOptions(linalgOp, options), folder); 381 } 382 383 namespace { 384 struct LinalgPromotionPass : public LinalgPromotionBase<LinalgPromotionPass> { 385 LinalgPromotionPass() = default; 386 LinalgPromotionPass(bool dynamicBuffers, bool useAlloca) { 387 this->dynamicBuffers = dynamicBuffers; 388 this->useAlloca = useAlloca; 389 } 390 391 void runOnFunction() override { 392 OperationFolder folder(&getContext()); 393 getFunction().walk([this, &folder](LinalgOp op) { 394 auto options = LinalgPromotionOptions() 395 .setDynamicBuffers(dynamicBuffers) 396 .setUseAlloca(useAlloca); 397 if (failed(promoteSubviewsPrecondition(op, options))) 398 return; 399 LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n"); 400 OpBuilder b(op); 401 promoteSubViews(b, op, options, &folder); 402 }); 403 } 404 }; 405 } // namespace 406 407 // TODO: support more transformation options in the pass. 408 std::unique_ptr<OperationPass<FuncOp>> 409 mlir::createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca) { 410 return std::make_unique<LinalgPromotionPass>(dynamicBuffers, useAlloca); 411 } 412 std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgPromotionPass() { 413 return std::make_unique<LinalgPromotionPass>(); 414 } 415