1 //===- Promotion.cpp - Implementation of linalg Promotion -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the linalg dialect Promotion pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PassDetail.h" 14 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" 15 #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h" 16 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 17 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" 18 #include "mlir/Dialect/Linalg/Passes.h" 19 #include "mlir/Dialect/Linalg/Transforms/Transforms.h" 20 #include "mlir/Dialect/Linalg/Utils/Utils.h" 21 #include "mlir/Dialect/SCF/SCF.h" 22 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" 23 #include "mlir/IR/AffineExpr.h" 24 #include "mlir/IR/AffineExprVisitor.h" 25 #include "mlir/IR/AffineMap.h" 26 #include "mlir/Support/LLVM.h" 27 #include "mlir/Transforms/FoldUtils.h" 28 29 #include "llvm/ADT/SetVector.h" 30 #include "llvm/Support/CommandLine.h" 31 32 using namespace mlir; 33 using namespace mlir::edsc; 34 using namespace mlir::edsc::intrinsics; 35 using namespace mlir::linalg; 36 using namespace mlir::scf; 37 38 using llvm::SetVector; 39 40 using folded_affine_min = FoldedValueBuilder<AffineMinOp>; 41 using folded_linalg_range = FoldedValueBuilder<linalg::RangeOp>; 42 using folded_std_dim = FoldedValueBuilder<DimOp>; 43 using folded_std_subview = FoldedValueBuilder<SubViewOp>; 44 using folded_std_view = FoldedValueBuilder<ViewOp>; 45 46 #define DEBUG_TYPE "linalg-promotion" 47 48 namespace { 49 50 /// Helper struct that captures the information required to apply the 51 /// transformation on each op. This bridges the abstraction gap with the 52 /// user-facing API which exposes positional arguments to control which operands 53 /// are promoted. 54 struct LinalgOpInstancePromotionOptions { 55 LinalgOpInstancePromotionOptions(LinalgOp op, 56 const LinalgPromotionOptions &options); 57 /// SubViews to promote. 58 SetVector<Value> subViews; 59 /// Allow the use of dynamicaly-sized buffers. 60 bool dynamicBuffers; 61 /// Alignment of promoted buffer. 62 Optional<unsigned> alignment; 63 }; 64 } // namespace 65 66 LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( 67 LinalgOp linalgOp, const LinalgPromotionOptions &options) 68 : subViews(), dynamicBuffers(options.dynamicBuffers), 69 alignment(options.alignment) { 70 if (options.operandsToPromote.hasValue()) { 71 for (unsigned idx : options.operandsToPromote.getValue()) { 72 auto *op = linalgOp.getBuffer(idx).getDefiningOp(); 73 if (auto sv = dyn_cast_or_null<SubViewOp>(op)) 74 subViews.insert(sv); 75 } 76 } else { 77 unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); 78 for (unsigned idx = 0; idx < nBuffers; ++idx) { 79 auto *op = linalgOp.getBuffer(idx).getDefiningOp(); 80 if (auto sv = dyn_cast_or_null<SubViewOp>(op)) 81 subViews.insert(sv); 82 } 83 } 84 } 85 86 /// If `size` comes from an AffineMinOp and one of the values of AffineMinOp 87 /// is a constant then return a new value set to the smallest such constant. 88 /// Otherwise return size. 89 static Value extractSmallestConstantBoundingSize(OpBuilder &b, Location loc, 90 Value size) { 91 auto affineMinOp = size.getDefiningOp<AffineMinOp>(); 92 if (!affineMinOp) 93 return size; 94 int64_t minConst = std::numeric_limits<int64_t>::max(); 95 for (auto e : affineMinOp.getAffineMap().getResults()) 96 if (auto cst = e.dyn_cast<AffineConstantExpr>()) 97 minConst = std::min(minConst, cst.getValue()); 98 return (minConst == std::numeric_limits<int64_t>::max()) 99 ? size 100 : b.create<ConstantIndexOp>(loc, minConst); 101 } 102 103 /// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly 104 /// the size needed, otherwise try to allocate a static bounding box. 105 static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers, 106 OperationFolder *folder, 107 Optional<unsigned> alignment = None) { 108 auto *ctx = size.getContext(); 109 auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); 110 IntegerAttr alignment_attr; 111 if (alignment.hasValue()) 112 alignment_attr = 113 IntegerAttr::get(IntegerType::get(64, ctx), alignment.getValue()); 114 if (!dynamicBuffers) 115 if (auto cst = size.getDefiningOp<ConstantIndexOp>()) 116 return std_alloc( 117 MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx)), 118 ValueRange{}, alignment_attr); 119 Value mul = 120 folded_std_muli(folder, folded_std_constant_index(folder, width), size); 121 return std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul, 122 alignment_attr); 123 } 124 125 // Performs promotion of a `subView` into a local buffer of the size of the 126 // *ranges* of the `subView`. This produces a buffer whose size may be bigger 127 // than the actual size of the `subView` at the boundaries. 128 // This is related to the full/partial tile problem. 129 // Returns a PromotionInfo containing a `buffer`, `fullLocalView` and 130 // `partialLocalView` such that: 131 // * `buffer` is always the size of the full tile. 132 // * `fullLocalView` is a dense contiguous view into that buffer. 133 // * `partialLocalView` is a dense non-contiguous slice of `fullLocalView` 134 // that corresponds to the size of `subView` and accounting for boundary 135 // effects. 136 // The point of the full tile buffer is that constant static tile sizes are 137 // folded and result in a buffer type with statically known size and alignment 138 // properties. 139 // To account for general boundary effects, padding must be performed on the 140 // boundary tiles. For now this is done with an unconditional `fill` op followed 141 // by a partial `copy` op. 142 static PromotionInfo promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, 143 SubViewOp subView, 144 bool dynamicBuffers, 145 Optional<unsigned> alignment, 146 OperationFolder *folder) { 147 auto zero = folded_std_constant_index(folder, 0); 148 auto one = folded_std_constant_index(folder, 1); 149 150 auto viewType = subView.getType(); 151 auto rank = viewType.getRank(); 152 Value allocSize = one; 153 SmallVector<Value, 8> fullSizes, partialSizes; 154 fullSizes.reserve(rank); 155 partialSizes.reserve(rank); 156 for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) { 157 auto rank = en.index(); 158 auto rangeValue = en.value(); 159 // Try to extract a tight constant. 160 LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); 161 Value size = extractSmallestConstantBoundingSize(b, loc, rangeValue.size); 162 LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n"); 163 allocSize = folded_std_muli(folder, allocSize, size); 164 fullSizes.push_back(size); 165 partialSizes.push_back(folded_std_dim(folder, subView, rank)); 166 } 167 SmallVector<int64_t, 4> dynSizes(fullSizes.size(), -1); 168 auto buffer = allocBuffer(viewType.getElementType(), allocSize, 169 dynamicBuffers, folder, alignment); 170 auto fullLocalView = folded_std_view( 171 folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer, 172 zero, fullSizes); 173 SmallVector<Value, 4> zeros(fullSizes.size(), zero); 174 SmallVector<Value, 4> ones(fullSizes.size(), one); 175 auto partialLocalView = 176 folded_std_subview(folder, fullLocalView, zeros, partialSizes, ones); 177 return PromotionInfo{buffer, fullLocalView, partialLocalView}; 178 } 179 180 static SmallVector<PromotionInfo, 8> 181 promoteSubViews(OpBuilder &b, Location loc, 182 LinalgOpInstancePromotionOptions options, 183 OperationFolder *folder) { 184 if (options.subViews.empty()) 185 return {}; 186 187 ScopedContext scope(b, loc); 188 SmallVector<PromotionInfo, 8> res; 189 res.reserve(options.subViews.size()); 190 DenseMap<Value, PromotionInfo> promotionInfoMap; 191 for (auto v : options.subViews) { 192 SubViewOp subView = cast<SubViewOp>(v.getDefiningOp()); 193 auto promotionInfo = promoteSubviewAsNewBuffer( 194 b, loc, subView, options.dynamicBuffers, options.alignment, folder); 195 promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo)); 196 res.push_back(promotionInfo); 197 } 198 199 for (auto v : options.subViews) { 200 SubViewOp subView = cast<SubViewOp>(v.getDefiningOp()); 201 auto info = promotionInfoMap.find(v); 202 if (info == promotionInfoMap.end()) 203 continue; 204 Value fillVal; 205 if (auto t = subView.getType().getElementType().dyn_cast<FloatType>()) 206 fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0)); 207 else if (auto t = 208 subView.getType().getElementType().dyn_cast<IntegerType>()) 209 fillVal = folded_std_constant_int(folder, 0, t); 210 // TODO(ntv): fill is only necessary if `promotionInfo` has a full local 211 // view that is different from the partial local view and we are on the 212 // boundary. 213 linalg_fill(info->second.fullLocalView, fillVal); 214 } 215 216 for (auto v : options.subViews) { 217 auto info = promotionInfoMap.find(v); 218 if (info == promotionInfoMap.end()) 219 continue; 220 linalg_copy(cast<SubViewOp>(v.getDefiningOp()), 221 info->second.partialLocalView); 222 } 223 return res; 224 } 225 226 static void promoteSubViews(OpBuilder &b, LinalgOp op, 227 LinalgOpInstancePromotionOptions options, 228 OperationFolder *folder) { 229 assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); 230 231 if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) { 232 // TODO(ntv): add a level of indirection to linalg.generic. 233 if (convOp.padding()) 234 llvm_unreachable("Unexpected conv with padding"); 235 } 236 237 // 1. Promote the specified views and use them in the new op. 238 auto loc = op.getLoc(); 239 auto promotedBufferAndViews = promoteSubViews(b, loc, options, folder); 240 SmallVector<Value, 8> opViews; 241 opViews.reserve(op.getNumInputsAndOutputs()); 242 SmallVector<std::pair<Value, Value>, 8> writebackViews; 243 writebackViews.reserve(promotedBufferAndViews.size()); 244 unsigned promotedIdx = 0; 245 for (auto view : op.getInputsAndOutputBuffers()) { 246 if (options.subViews.count(view) != 0) { 247 opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); 248 writebackViews.emplace_back(std::make_pair( 249 view, promotedBufferAndViews[promotedIdx].partialLocalView)); 250 promotedIdx++; 251 } else { 252 opViews.push_back(view); 253 } 254 } 255 256 // 2. Append all other operands as they appear, this enforces that such 257 // operands are not views. This is to support cases such as FillOp taking 258 // extra scalars etc. 259 // Keep a reference to output buffers; 260 DenseSet<Value> originalOutputs(op.getOutputBuffers().begin(), 261 op.getOutputBuffers().end()); 262 op.getOperation()->setOperands(0, opViews.size(), opViews); 263 264 OpBuilder::InsertionGuard guard(b); 265 b.setInsertionPointAfter(op); 266 ScopedContext scope(b, loc); 267 // 3. Emit write-back for the promoted output views: copy the partial view. 268 for (auto viewAndPartialLocalView : writebackViews) 269 if (originalOutputs.count(viewAndPartialLocalView.first)) 270 linalg_copy(viewAndPartialLocalView.second, 271 viewAndPartialLocalView.first); 272 273 // 4. Dealloc all local buffers. 274 for (const auto &pi : promotedBufferAndViews) 275 std_dealloc(pi.buffer); 276 } 277 278 LogicalResult 279 mlir::linalg::promoteSubviewsPrecondition(Operation *op, 280 LinalgPromotionOptions options) { 281 LinalgOp linOp = dyn_cast<LinalgOp>(op); 282 // Transformation applies to buffers only. 283 if (!linOp || !linOp.hasBufferSemantics()) 284 return failure(); 285 // Check that at least one of the requested operands is indeed a subview. 286 for (auto en : llvm::enumerate(linOp.getInputsAndOutputBuffers())) { 287 auto sv = isa_and_nonnull<SubViewOp>(en.value().getDefiningOp()); 288 if (sv) { 289 if (!options.operandsToPromote.hasValue() || 290 options.operandsToPromote->count(en.index())) 291 return success(); 292 } 293 } 294 // TODO: Check all subviews requested are bound by a static constant. 295 // TODO: Check that the total footprint fits within a given size. 296 return failure(); 297 } 298 299 LinalgOp mlir::linalg::promoteSubViews(OpBuilder &b, LinalgOp linalgOp, 300 LinalgPromotionOptions options, 301 OperationFolder *folder) { 302 LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options); 303 ::promoteSubViews( 304 b, linalgOp, LinalgOpInstancePromotionOptions(linalgOp, options), folder); 305 return linalgOp; 306 } 307 308 namespace { 309 struct LinalgPromotionPass : public LinalgPromotionBase<LinalgPromotionPass> { 310 LinalgPromotionPass() = default; 311 LinalgPromotionPass(bool dynamicBuffers) { 312 this->dynamicBuffers = dynamicBuffers; 313 } 314 315 void runOnFunction() override { 316 OperationFolder folder(&getContext()); 317 getFunction().walk([this, &folder](LinalgOp op) { 318 auto options = LinalgPromotionOptions().setDynamicBuffers(dynamicBuffers); 319 if (failed(promoteSubviewsPrecondition(op, options))) 320 return; 321 LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n"); 322 OpBuilder b(op); 323 promoteSubViews(b, op, options, &folder); 324 }); 325 } 326 }; 327 } // namespace 328 329 // TODO: support more transformation options in the pass. 330 std::unique_ptr<OperationPass<FuncOp>> 331 mlir::createLinalgPromotionPass(bool dynamicBuffers) { 332 return std::make_unique<LinalgPromotionPass>(dynamicBuffers); 333 } 334 std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgPromotionPass() { 335 return std::make_unique<LinalgPromotionPass>(); 336 } 337