1 //===- Promotion.cpp - Implementation of linalg Promotion -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the linalg dialect Promotion pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PassDetail.h" 14 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" 15 #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h" 16 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 17 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" 18 #include "mlir/Dialect/Linalg/Passes.h" 19 #include "mlir/Dialect/Linalg/Transforms/Transforms.h" 20 #include "mlir/Dialect/Linalg/Utils/Utils.h" 21 #include "mlir/Dialect/SCF/SCF.h" 22 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" 23 #include "mlir/IR/AffineExpr.h" 24 #include "mlir/IR/AffineExprVisitor.h" 25 #include "mlir/IR/AffineMap.h" 26 #include "mlir/Support/LLVM.h" 27 #include "mlir/Transforms/FoldUtils.h" 28 29 #include "llvm/ADT/SetVector.h" 30 #include "llvm/Support/CommandLine.h" 31 32 using namespace mlir; 33 using namespace mlir::edsc; 34 using namespace mlir::edsc::intrinsics; 35 using namespace mlir::linalg; 36 using namespace mlir::scf; 37 38 using llvm::SetVector; 39 40 using folded_affine_min = FoldedValueBuilder<AffineMinOp>; 41 using folded_linalg_range = FoldedValueBuilder<linalg::RangeOp>; 42 using folded_std_dim = FoldedValueBuilder<DimOp>; 43 using folded_std_subview = FoldedValueBuilder<SubViewOp>; 44 using folded_std_view = FoldedValueBuilder<ViewOp>; 45 46 #define DEBUG_TYPE "linalg-promotion" 47 48 namespace { 49 50 /// Helper struct that captures the information required to apply the 51 /// transformation on each op. This bridges the abstraction gap with the 52 /// user-facing API which exposes positional arguments to control which operands 53 /// are promoted. 54 struct LinalgOpInstancePromotionOptions { 55 LinalgOpInstancePromotionOptions(LinalgOp op, 56 const LinalgPromotionOptions &options); 57 /// SubViews to promote. 58 SetVector<Value> subViews; 59 /// True if the full view should be used for the promoted buffer. 60 DenseMap<Value, bool> useFullTileBuffers; 61 /// Allow the use of dynamicaly-sized buffers. 62 bool dynamicBuffers; 63 /// Alignment of promoted buffer. 64 Optional<unsigned> alignment; 65 }; 66 } // namespace 67 68 LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( 69 LinalgOp linalgOp, const LinalgPromotionOptions &options) 70 : subViews(), useFullTileBuffers(), dynamicBuffers(options.dynamicBuffers), 71 alignment(options.alignment) { 72 unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); 73 auto vUseFullTileBuffers = 74 options.useFullTileBuffers.getValueOr(llvm::SmallBitVector()); 75 vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault); 76 77 if (options.operandsToPromote.hasValue()) { 78 for (auto it : llvm::enumerate(options.operandsToPromote.getValue())) { 79 auto *op = linalgOp.getBuffer(it.value()).getDefiningOp(); 80 if (auto sv = dyn_cast_or_null<SubViewOp>(op)) { 81 subViews.insert(sv); 82 useFullTileBuffers[sv] = vUseFullTileBuffers[it.index()]; 83 } 84 } 85 } else { 86 for (unsigned idx = 0; idx < nBuffers; ++idx) { 87 auto *op = linalgOp.getBuffer(idx).getDefiningOp(); 88 if (auto sv = dyn_cast_or_null<SubViewOp>(op)) { 89 subViews.insert(sv); 90 useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; 91 } 92 } 93 } 94 } 95 96 /// If `size` comes from an AffineMinOp and one of the values of AffineMinOp 97 /// is a constant then return a new value set to the smallest such constant. 98 /// Otherwise return size. 99 static Value extractSmallestConstantBoundingSize(OpBuilder &b, Location loc, 100 Value size) { 101 auto affineMinOp = size.getDefiningOp<AffineMinOp>(); 102 if (!affineMinOp) 103 return size; 104 int64_t minConst = std::numeric_limits<int64_t>::max(); 105 for (auto e : affineMinOp.getAffineMap().getResults()) 106 if (auto cst = e.dyn_cast<AffineConstantExpr>()) 107 minConst = std::min(minConst, cst.getValue()); 108 return (minConst == std::numeric_limits<int64_t>::max()) 109 ? size 110 : b.create<ConstantIndexOp>(loc, minConst); 111 } 112 113 /// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly 114 /// the size needed, otherwise try to allocate a static bounding box. 115 static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers, 116 OperationFolder *folder, 117 Optional<unsigned> alignment = None) { 118 auto *ctx = size.getContext(); 119 auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); 120 IntegerAttr alignment_attr; 121 if (alignment.hasValue()) 122 alignment_attr = 123 IntegerAttr::get(IntegerType::get(64, ctx), alignment.getValue()); 124 if (!dynamicBuffers) 125 if (auto cst = size.getDefiningOp<ConstantIndexOp>()) 126 return std_alloc( 127 MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx)), 128 ValueRange{}, alignment_attr); 129 Value mul = 130 folded_std_muli(folder, folded_std_constant_index(folder, width), size); 131 return std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul, 132 alignment_attr); 133 } 134 135 // Performs promotion of a `subView` into a local buffer of the size of the 136 // *ranges* of the `subView`. This produces a buffer whose size may be bigger 137 // than the actual size of the `subView` at the boundaries. 138 // This is related to the full/partial tile problem. 139 // Returns a PromotionInfo containing a `buffer`, `fullLocalView` and 140 // `partialLocalView` such that: 141 // * `buffer` is always the size of the full tile. 142 // * `fullLocalView` is a dense contiguous view into that buffer. 143 // * `partialLocalView` is a dense non-contiguous slice of `fullLocalView` 144 // that corresponds to the size of `subView` and accounting for boundary 145 // effects. 146 // The point of the full tile buffer is that constant static tile sizes are 147 // folded and result in a buffer type with statically known size and alignment 148 // properties. 149 // To account for general boundary effects, padding must be performed on the 150 // boundary tiles. For now this is done with an unconditional `fill` op followed 151 // by a partial `copy` op. 152 static PromotionInfo promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, 153 SubViewOp subView, 154 bool dynamicBuffers, 155 Optional<unsigned> alignment, 156 OperationFolder *folder) { 157 auto zero = folded_std_constant_index(folder, 0); 158 auto one = folded_std_constant_index(folder, 1); 159 160 auto viewType = subView.getType(); 161 auto rank = viewType.getRank(); 162 Value allocSize = one; 163 SmallVector<Value, 8> fullSizes, partialSizes; 164 fullSizes.reserve(rank); 165 partialSizes.reserve(rank); 166 for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) { 167 auto rank = en.index(); 168 auto rangeValue = en.value(); 169 // Try to extract a tight constant. 170 LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); 171 Value size = extractSmallestConstantBoundingSize(b, loc, rangeValue.size); 172 LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n"); 173 allocSize = folded_std_muli(folder, allocSize, size); 174 fullSizes.push_back(size); 175 partialSizes.push_back(folded_std_dim(folder, subView, rank)); 176 } 177 SmallVector<int64_t, 4> dynSizes(fullSizes.size(), -1); 178 auto buffer = allocBuffer(viewType.getElementType(), allocSize, 179 dynamicBuffers, folder, alignment); 180 auto fullLocalView = folded_std_view( 181 folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer, 182 zero, fullSizes); 183 SmallVector<Value, 4> zeros(fullSizes.size(), zero); 184 SmallVector<Value, 4> ones(fullSizes.size(), one); 185 auto partialLocalView = 186 folded_std_subview(folder, fullLocalView, zeros, partialSizes, ones); 187 return PromotionInfo{buffer, fullLocalView, partialLocalView}; 188 } 189 190 static SmallVector<PromotionInfo, 8> 191 promoteSubViews(OpBuilder &b, Location loc, 192 LinalgOpInstancePromotionOptions options, 193 OperationFolder *folder) { 194 if (options.subViews.empty()) 195 return {}; 196 197 ScopedContext scope(b, loc); 198 SmallVector<PromotionInfo, 8> res; 199 res.reserve(options.subViews.size()); 200 DenseMap<Value, PromotionInfo> promotionInfoMap; 201 for (auto v : options.subViews) { 202 SubViewOp subView = cast<SubViewOp>(v.getDefiningOp()); 203 auto promotionInfo = promoteSubviewAsNewBuffer( 204 b, loc, subView, options.dynamicBuffers, options.alignment, folder); 205 promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo)); 206 res.push_back(promotionInfo); 207 } 208 209 for (auto v : options.subViews) { 210 SubViewOp subView = cast<SubViewOp>(v.getDefiningOp()); 211 auto info = promotionInfoMap.find(v); 212 if (info == promotionInfoMap.end()) 213 continue; 214 // Only fill the buffer if the full local view is used 215 if (!options.useFullTileBuffers[v]) 216 continue; 217 Value fillVal; 218 if (auto t = subView.getType().getElementType().dyn_cast<FloatType>()) 219 fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0)); 220 else if (auto t = 221 subView.getType().getElementType().dyn_cast<IntegerType>()) 222 fillVal = folded_std_constant_int(folder, 0, t); 223 // TODO(ntv): fill is only necessary if `promotionInfo` has a full local 224 // view that is different from the partial local view and we are on the 225 // boundary. 226 linalg_fill(info->second.fullLocalView, fillVal); 227 } 228 229 for (auto v : options.subViews) { 230 auto info = promotionInfoMap.find(v); 231 if (info == promotionInfoMap.end()) 232 continue; 233 linalg_copy(cast<SubViewOp>(v.getDefiningOp()), 234 info->second.partialLocalView); 235 } 236 return res; 237 } 238 239 static void promoteSubViews(OpBuilder &b, LinalgOp op, 240 LinalgOpInstancePromotionOptions options, 241 OperationFolder *folder) { 242 assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); 243 244 if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) { 245 // TODO(ntv): add a level of indirection to linalg.generic. 246 if (convOp.padding()) 247 llvm_unreachable("Unexpected conv with padding"); 248 } 249 250 // 1. Promote the specified views and use them in the new op. 251 auto loc = op.getLoc(); 252 auto promotedBufferAndViews = promoteSubViews(b, loc, options, folder); 253 SmallVector<Value, 8> opViews; 254 opViews.reserve(op.getNumInputsAndOutputs()); 255 SmallVector<std::pair<Value, Value>, 8> writebackViews; 256 writebackViews.reserve(promotedBufferAndViews.size()); 257 unsigned promotedIdx = 0; 258 for (auto view : op.getInputsAndOutputBuffers()) { 259 if (options.subViews.count(view) != 0) { 260 if (options.useFullTileBuffers[view]) 261 opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); 262 else 263 opViews.push_back(promotedBufferAndViews[promotedIdx].partialLocalView); 264 writebackViews.emplace_back(std::make_pair( 265 view, promotedBufferAndViews[promotedIdx].partialLocalView)); 266 promotedIdx++; 267 } else { 268 opViews.push_back(view); 269 } 270 } 271 272 // 2. Append all other operands as they appear, this enforces that such 273 // operands are not views. This is to support cases such as FillOp taking 274 // extra scalars etc. 275 // Keep a reference to output buffers; 276 DenseSet<Value> originalOutputs(op.getOutputBuffers().begin(), 277 op.getOutputBuffers().end()); 278 op.getOperation()->setOperands(0, opViews.size(), opViews); 279 280 OpBuilder::InsertionGuard guard(b); 281 b.setInsertionPointAfter(op); 282 ScopedContext scope(b, loc); 283 // 3. Emit write-back for the promoted output views: copy the partial view. 284 for (auto viewAndPartialLocalView : writebackViews) 285 if (originalOutputs.count(viewAndPartialLocalView.first)) 286 linalg_copy(viewAndPartialLocalView.second, 287 viewAndPartialLocalView.first); 288 289 // 4. Dealloc all local buffers. 290 for (const auto &pi : promotedBufferAndViews) 291 std_dealloc(pi.buffer); 292 } 293 294 LogicalResult 295 mlir::linalg::promoteSubviewsPrecondition(Operation *op, 296 LinalgPromotionOptions options) { 297 LinalgOp linOp = dyn_cast<LinalgOp>(op); 298 // Transformation applies to buffers only. 299 if (!linOp || !linOp.hasBufferSemantics()) 300 return failure(); 301 // Check that at least one of the requested operands is indeed a subview. 302 for (auto en : llvm::enumerate(linOp.getInputsAndOutputBuffers())) { 303 auto sv = isa_and_nonnull<SubViewOp>(en.value().getDefiningOp()); 304 if (sv) { 305 if (!options.operandsToPromote.hasValue() || 306 options.operandsToPromote->count(en.index())) 307 return success(); 308 } 309 } 310 // TODO: Check all subviews requested are bound by a static constant. 311 // TODO: Check that the total footprint fits within a given size. 312 return failure(); 313 } 314 315 LinalgOp mlir::linalg::promoteSubViews(OpBuilder &b, LinalgOp linalgOp, 316 LinalgPromotionOptions options, 317 OperationFolder *folder) { 318 LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options); 319 ::promoteSubViews( 320 b, linalgOp, LinalgOpInstancePromotionOptions(linalgOp, options), folder); 321 return linalgOp; 322 } 323 324 namespace { 325 struct LinalgPromotionPass : public LinalgPromotionBase<LinalgPromotionPass> { 326 LinalgPromotionPass() = default; 327 LinalgPromotionPass(bool dynamicBuffers) { 328 this->dynamicBuffers = dynamicBuffers; 329 } 330 331 void runOnFunction() override { 332 OperationFolder folder(&getContext()); 333 getFunction().walk([this, &folder](LinalgOp op) { 334 auto options = LinalgPromotionOptions().setDynamicBuffers(dynamicBuffers); 335 if (failed(promoteSubviewsPrecondition(op, options))) 336 return; 337 LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n"); 338 OpBuilder b(op); 339 promoteSubViews(b, op, options, &folder); 340 }); 341 } 342 }; 343 } // namespace 344 345 // TODO: support more transformation options in the pass. 346 std::unique_ptr<OperationPass<FuncOp>> 347 mlir::createLinalgPromotionPass(bool dynamicBuffers) { 348 return std::make_unique<LinalgPromotionPass>(dynamicBuffers); 349 } 350 std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgPromotionPass() { 351 return std::make_unique<LinalgPromotionPass>(); 352 } 353