1 //===- Promotion.cpp - Implementation of linalg Promotion -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the linalg dialect Promotion pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PassDetail.h" 14 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" 15 #include "mlir/Dialect/Complex/IR/Complex.h" 16 #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h" 17 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 18 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" 19 #include "mlir/Dialect/Linalg/Passes.h" 20 #include "mlir/Dialect/Linalg/Transforms/Transforms.h" 21 #include "mlir/Dialect/Linalg/Utils/Utils.h" 22 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" 23 #include "mlir/Dialect/SCF/SCF.h" 24 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" 25 #include "mlir/IR/AffineExpr.h" 26 #include "mlir/IR/AffineExprVisitor.h" 27 #include "mlir/IR/AffineMap.h" 28 #include "mlir/Support/LLVM.h" 29 #include "mlir/Transforms/FoldUtils.h" 30 #include "llvm/ADT/MapVector.h" 31 #include "llvm/Support/CommandLine.h" 32 33 using namespace mlir; 34 using namespace mlir::edsc; 35 using namespace mlir::edsc::intrinsics; 36 using namespace mlir::linalg; 37 using namespace mlir::scf; 38 39 using llvm::MapVector; 40 41 using folded_affine_min = FoldedValueBuilder<AffineMinOp>; 42 using folded_linalg_range = FoldedValueBuilder<linalg::RangeOp>; 43 using folded_memref_dim = FoldedValueBuilder<memref::DimOp>; 44 using folded_memref_subview = FoldedValueBuilder<memref::SubViewOp>; 45 using folded_memref_view = FoldedValueBuilder<memref::ViewOp>; 46 47 #define DEBUG_TYPE "linalg-promotion" 48 49 /// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly 50 /// the size needed, otherwise try to allocate a static bounding box. 51 static Value allocBuffer(const LinalgPromotionOptions &options, 52 Type elementType, Value size, bool dynamicBuffers, 53 DataLayout &layout, OperationFolder *folder, 54 Optional<unsigned> alignment = None) { 55 auto *ctx = size.getContext(); 56 auto width = layout.getTypeSize(elementType); 57 IntegerAttr alignment_attr; 58 if (alignment.hasValue()) 59 alignment_attr = 60 IntegerAttr::get(IntegerType::get(ctx, 64), alignment.getValue()); 61 if (!dynamicBuffers) 62 if (auto cst = size.getDefiningOp<ConstantIndexOp>()) 63 return options.useAlloca 64 ? memref_alloca(MemRefType::get(width * cst.getValue(), 65 IntegerType::get(ctx, 8)), 66 ValueRange{}, alignment_attr) 67 .value 68 : memref_alloc(MemRefType::get(width * cst.getValue(), 69 IntegerType::get(ctx, 8)), 70 ValueRange{}, alignment_attr) 71 .value; 72 Value mul = 73 folded_std_muli(folder, folded_std_constant_index(folder, width), size); 74 return options.useAlloca 75 ? memref_alloca(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul, 76 alignment_attr) 77 .value 78 : memref_alloc(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul, 79 alignment_attr) 80 .value; 81 } 82 83 /// Default allocation callback function. This allocates a promoted buffer when 84 /// no call back to do so is provided. The default is to allocate a 85 /// memref<..xi8> and return a view to get a memref type of shape 86 /// boundingSubViewSize. 87 static Optional<Value> 88 defaultAllocBufferCallBack(const LinalgPromotionOptions &options, 89 OpBuilder &builder, memref::SubViewOp subView, 90 ArrayRef<Value> boundingSubViewSize, 91 bool dynamicBuffers, Optional<unsigned> alignment, 92 DataLayout &layout, OperationFolder *folder) { 93 ShapedType viewType = subView.getType(); 94 int64_t rank = viewType.getRank(); 95 (void)rank; 96 assert(rank > 0 && boundingSubViewSize.size() == static_cast<size_t>(rank)); 97 auto zero = folded_std_constant_index(folder, 0); 98 auto one = folded_std_constant_index(folder, 1); 99 100 Value allocSize = one; 101 for (auto size : llvm::enumerate(boundingSubViewSize)) 102 allocSize = folded_std_muli(folder, allocSize, size.value()); 103 Value buffer = allocBuffer(options, viewType.getElementType(), allocSize, 104 dynamicBuffers, layout, folder, alignment); 105 SmallVector<int64_t, 4> dynSizes(boundingSubViewSize.size(), 106 ShapedType::kDynamicSize); 107 Value view = folded_memref_view( 108 folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer, 109 zero, boundingSubViewSize); 110 return view; 111 } 112 113 /// Default implementation of deallocation of the buffer use for promotion. It 114 /// expects to get the same value that the default allocation method returned, 115 /// i.e. result of a ViewOp. 116 static LogicalResult 117 defaultDeallocBufferCallBack(const LinalgPromotionOptions &options, 118 OpBuilder &b, Value fullLocalView) { 119 auto viewOp = fullLocalView.getDefiningOp<memref::ViewOp>(); 120 assert(viewOp && "expected full local view to be a ViewOp"); 121 if (!options.useAlloca) 122 memref_dealloc(viewOp.source()); 123 return success(); 124 } 125 126 namespace { 127 128 /// Helper struct that captures the information required to apply the 129 /// transformation on each op. This bridges the abstraction gap with the 130 /// user-facing API which exposes positional arguments to control which operands 131 /// are promoted. 132 struct LinalgOpInstancePromotionOptions { 133 LinalgOpInstancePromotionOptions(LinalgOp op, 134 const LinalgPromotionOptions &options); 135 /// SubViews to promote. 136 MapVector<unsigned, Value> subViews; 137 /// True if the full view should be used for the promoted buffer. 138 DenseMap<Value, bool> useFullTileBuffers; 139 140 /// Callback functions for allocation and deallocation of promoted buffers, as 141 /// well as to copy the data into and out of these buffers. 142 AllocBufferCallbackFn allocationFn; 143 DeallocBufferCallbackFn deallocationFn; 144 CopyCallbackFn copyInFn; 145 CopyCallbackFn copyOutFn; 146 147 /// Allow the use of dynamically-sized buffers. 148 bool dynamicBuffers; 149 /// Alignment of promoted buffer. 150 Optional<unsigned> alignment; 151 }; 152 } // namespace 153 154 LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( 155 LinalgOp linalgOp, const LinalgPromotionOptions &options) 156 : subViews(), dynamicBuffers(options.dynamicBuffers), 157 alignment(options.alignment) { 158 assert(linalgOp.hasBufferSemantics() && "revisit usage of shaped operand"); 159 unsigned nBuffers = linalgOp.getNumShapedOperands(); 160 auto vUseFullTileBuffers = 161 options.useFullTileBuffers.getValueOr(llvm::SmallBitVector()); 162 vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault); 163 164 for (unsigned idx = 0; idx != nBuffers; ++idx) { 165 if (options.operandsToPromote && !options.operandsToPromote->count(idx)) 166 continue; 167 auto *op = linalgOp.getShapedOperand(idx).getDefiningOp(); 168 if (auto sv = dyn_cast_or_null<memref::SubViewOp>(op)) { 169 subViews[idx] = sv; 170 useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; 171 } 172 } 173 174 allocationFn = 175 (options.allocationFn 176 ? *(options.allocationFn) 177 : [&](OpBuilder &builder, memref::SubViewOp subViewOp, 178 ArrayRef<Value> boundingSubViewSize, DataLayout &layout, 179 OperationFolder *folder) -> Optional<Value> { 180 return defaultAllocBufferCallBack(options, builder, subViewOp, 181 boundingSubViewSize, dynamicBuffers, 182 alignment, layout, folder); 183 }); 184 deallocationFn = 185 (options.deallocationFn 186 ? *(options.deallocationFn) 187 : [&](OpBuilder &b, Value buffer) { 188 return defaultDeallocBufferCallBack(options, b, buffer); 189 }); 190 auto defaultCopyCallBack = [&](OpBuilder &builder, Value src, 191 Value dst) -> LogicalResult { 192 linalg_copy(src, dst); 193 return success(); 194 }; 195 copyInFn = (options.copyInFn ? *(options.copyInFn) : defaultCopyCallBack); 196 copyOutFn = (options.copyOutFn ? *(options.copyOutFn) : defaultCopyCallBack); 197 } 198 199 // Performs promotion of a `subView` into a local buffer of the size of the 200 // *ranges* of the `subView`. This produces a buffer whose size may be bigger 201 // than the actual size of the `subView` at the boundaries. 202 // This is related to the full/partial tile problem. 203 // Returns a PromotionInfo containing a `buffer`, `fullLocalView` and 204 // `partialLocalView` such that: 205 // * `buffer` is always the size of the full tile. 206 // * `fullLocalView` is a dense contiguous view into that buffer. 207 // * `partialLocalView` is a dense non-contiguous slice of `fullLocalView` 208 // that corresponds to the size of `subView` and accounting for boundary 209 // effects. 210 // The point of the full tile buffer is that constant static tile sizes are 211 // folded and result in a buffer type with statically known size and alignment 212 // properties. 213 // To account for general boundary effects, padding must be performed on the 214 // boundary tiles. For now this is done with an unconditional `fill` op followed 215 // by a partial `copy` op. 216 Optional<PromotionInfo> mlir::linalg::promoteSubviewAsNewBuffer( 217 OpBuilder &b, Location loc, memref::SubViewOp subView, 218 AllocBufferCallbackFn allocationFn, DataLayout &layout, 219 OperationFolder *folder) { 220 ScopedContext scopedContext(b, loc); 221 auto viewType = subView.getType(); 222 auto rank = viewType.getRank(); 223 SmallVector<Value, 4> fullSizes; 224 SmallVector<OpFoldResult> partialSizes; 225 fullSizes.reserve(rank); 226 partialSizes.reserve(rank); 227 for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) { 228 auto rangeValue = en.value(); 229 // Try to extract a tight constant. 230 LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); 231 IntegerAttr sizeAttr = getSmallestBoundingIndex(rangeValue.size); 232 Value size = 233 (!sizeAttr) ? rangeValue.size : b.create<ConstantOp>(loc, sizeAttr); 234 LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n"); 235 fullSizes.push_back(size); 236 partialSizes.push_back( 237 folded_memref_dim(folder, subView, en.index()).value); 238 } 239 SmallVector<int64_t, 4> dynSizes(fullSizes.size(), -1); 240 // If a callback is not specified, then use the default implementation for 241 // allocating the promoted buffer. 242 Optional<Value> fullLocalView = 243 allocationFn(b, subView, fullSizes, layout, folder); 244 if (!fullLocalView) 245 return {}; 246 SmallVector<OpFoldResult, 4> zeros(fullSizes.size(), b.getIndexAttr(0)); 247 SmallVector<OpFoldResult, 4> ones(fullSizes.size(), b.getIndexAttr(1)); 248 auto partialLocalView = 249 folded_memref_subview(folder, *fullLocalView, zeros, partialSizes, ones); 250 return PromotionInfo{*fullLocalView, partialLocalView}; 251 } 252 253 static Optional<MapVector<unsigned, PromotionInfo>> 254 promoteSubViews(OpBuilder &b, Location loc, 255 LinalgOpInstancePromotionOptions options, DataLayout &layout, 256 OperationFolder *folder) { 257 if (options.subViews.empty()) 258 return {}; 259 260 ScopedContext scope(b, loc); 261 MapVector<unsigned, PromotionInfo> promotionInfoMap; 262 263 for (auto v : options.subViews) { 264 memref::SubViewOp subView = 265 cast<memref::SubViewOp>(v.second.getDefiningOp()); 266 Optional<PromotionInfo> promotionInfo = promoteSubviewAsNewBuffer( 267 b, loc, subView, options.allocationFn, layout, folder); 268 if (!promotionInfo) 269 return {}; 270 promotionInfoMap[v.first] = *promotionInfo; 271 272 // Only fill the buffer if the full local view is used 273 if (!options.useFullTileBuffers[v.second]) 274 continue; 275 Value fillVal; 276 if (auto t = subView.getType().getElementType().dyn_cast<FloatType>()) { 277 fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0)); 278 } else if (auto t = 279 subView.getType().getElementType().dyn_cast<IntegerType>()) { 280 fillVal = folded_std_constant_int(folder, 0, t); 281 } else if (auto t = 282 subView.getType().getElementType().dyn_cast<ComplexType>()) { 283 if (auto et = t.getElementType().dyn_cast<FloatType>()) 284 fillVal = folded_std_constant(folder, FloatAttr::get(et, 0.0)); 285 else if (auto et = t.getElementType().cast<IntegerType>()) 286 fillVal = folded_std_constant_int(folder, 0, et); 287 fillVal = b.create<complex::CreateOp>(loc, t, fillVal, fillVal); 288 } else { 289 return {}; 290 } 291 linalg_fill(promotionInfo->fullLocalView, fillVal); 292 } 293 294 // Copy data into the promoted buffers. Use callback if provided. 295 for (auto v : options.subViews) { 296 auto info = promotionInfoMap.find(v.first); 297 if (info == promotionInfoMap.end()) 298 continue; 299 if (failed(options.copyInFn( 300 b, cast<memref::SubViewOp>(v.second.getDefiningOp()), 301 info->second.partialLocalView))) 302 return {}; 303 } 304 return promotionInfoMap; 305 } 306 307 static Optional<LinalgOp> 308 promoteSubViews(OpBuilder &b, LinalgOp op, 309 LinalgOpInstancePromotionOptions options, DataLayout &layout, 310 OperationFolder *folder) { 311 assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); 312 313 if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) { 314 // TODO: add a level of indirection to linalg.generic. 315 if (convOp.padding()) 316 return {}; 317 } 318 319 // 1. Promote the specified views and use them in the new op. 320 auto loc = op.getLoc(); 321 auto promotedBuffersAndViews = 322 promoteSubViews(b, loc, options, layout, folder); 323 if (!promotedBuffersAndViews || 324 promotedBuffersAndViews->size() != options.subViews.size()) 325 return {}; 326 327 // 2. Append all other operands as they appear, this enforces that such 328 // operands are not views. This is to support cases such as FillOp taking 329 // extra scalars etc. Keep a reference to output buffers; 330 SmallVector<Value, 8> opViews; 331 opViews.reserve(op.getNumShapedOperands()); 332 SmallVector<std::pair<Value, Value>, 8> writebackViews; 333 writebackViews.reserve(promotedBuffersAndViews->size()); 334 for (auto view : llvm::enumerate(op.getShapedOperands())) { 335 if (options.subViews.count(view.index()) != 0) { 336 if (options.useFullTileBuffers[view.value()]) 337 opViews.push_back( 338 (*promotedBuffersAndViews)[view.index()].fullLocalView); 339 else 340 opViews.push_back( 341 (*promotedBuffersAndViews)[view.index()].partialLocalView); 342 if (view.index() >= op.getNumInputs()) 343 writebackViews.emplace_back(std::make_pair( 344 view.value(), 345 (*promotedBuffersAndViews)[view.index()].partialLocalView)); 346 } else { 347 opViews.push_back(view.value()); 348 } 349 } 350 op->setOperands(0, opViews.size(), opViews); 351 352 OpBuilder::InsertionGuard guard(b); 353 b.setInsertionPointAfter(op); 354 ScopedContext scope(b, loc); 355 // 3. Emit write-back for the promoted output views: copy the partial view. 356 for (auto viewAndPartialLocalView : writebackViews) { 357 if (failed(options.copyOutFn(b, viewAndPartialLocalView.second, 358 viewAndPartialLocalView.first))) 359 return {}; 360 } 361 362 // 4. Dealloc all local buffers. 363 for (const auto &pi : *promotedBuffersAndViews) 364 (void)options.deallocationFn(b, pi.second.fullLocalView); 365 return op; 366 } 367 368 LogicalResult 369 mlir::linalg::promoteSubviewsPrecondition(Operation *op, 370 LinalgPromotionOptions options) { 371 LinalgOp linOp = dyn_cast<LinalgOp>(op); 372 // Transformation applies to buffers only. 373 if (!linOp || !linOp.hasBufferSemantics()) 374 return failure(); 375 // Check that at least one of the requested operands is indeed a subview. 376 for (auto en : llvm::enumerate(linOp.getShapedOperands())) { 377 auto sv = isa_and_nonnull<memref::SubViewOp>(en.value().getDefiningOp()); 378 if (sv) { 379 if (!options.operandsToPromote.hasValue() || 380 options.operandsToPromote->count(en.index())) 381 return success(); 382 } 383 } 384 // TODO: Check all subviews requested are bound by a static constant. 385 // TODO: Check that the total footprint fits within a given size. 386 return failure(); 387 } 388 389 Optional<LinalgOp> mlir::linalg::promoteSubViews(OpBuilder &b, 390 LinalgOp linalgOp, 391 LinalgPromotionOptions options, 392 OperationFolder *folder) { 393 LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options); 394 auto layout = DataLayout::closest(linalgOp); 395 return ::promoteSubViews(b, linalgOp, linalgOptions, layout, folder); 396 } 397 398 namespace { 399 struct LinalgPromotionPass : public LinalgPromotionBase<LinalgPromotionPass> { 400 LinalgPromotionPass() = default; 401 LinalgPromotionPass(bool dynamicBuffers, bool useAlloca) { 402 this->dynamicBuffers = dynamicBuffers; 403 this->useAlloca = useAlloca; 404 } 405 406 void runOnFunction() override { 407 OperationFolder folder(&getContext()); 408 getFunction().walk([this, &folder](LinalgOp op) { 409 auto options = LinalgPromotionOptions() 410 .setDynamicBuffers(dynamicBuffers) 411 .setUseAlloca(useAlloca); 412 if (failed(promoteSubviewsPrecondition(op, options))) 413 return; 414 LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n"); 415 OpBuilder b(op); 416 promoteSubViews(b, op, options, &folder); 417 }); 418 } 419 }; 420 } // namespace 421 422 // TODO: support more transformation options in the pass. 423 std::unique_ptr<OperationPass<FuncOp>> 424 mlir::createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca) { 425 return std::make_unique<LinalgPromotionPass>(dynamicBuffers, useAlloca); 426 } 427 std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgPromotionPass() { 428 return std::make_unique<LinalgPromotionPass>(); 429 } 430