1 //===- Promotion.cpp - Implementation of linalg Promotion -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the linalg dialect Promotion pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PassDetail.h" 14 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" 15 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h" 16 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 17 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" 18 #include "mlir/Dialect/Linalg/Passes.h" 19 #include "mlir/Dialect/Linalg/Utils/Utils.h" 20 #include "mlir/Dialect/LoopOps/LoopOps.h" 21 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" 22 #include "mlir/IR/AffineExpr.h" 23 #include "mlir/IR/AffineExprVisitor.h" 24 #include "mlir/IR/AffineMap.h" 25 #include "mlir/Support/LLVM.h" 26 #include "mlir/Support/STLExtras.h" 27 #include "mlir/Transforms/FoldUtils.h" 28 29 #include "llvm/ADT/SetVector.h" 30 #include "llvm/Support/CommandLine.h" 31 32 using namespace mlir; 33 using namespace mlir::edsc; 34 using namespace mlir::edsc::intrinsics; 35 using namespace mlir::linalg; 36 using namespace mlir::loop; 37 38 using llvm::SetVector; 39 40 using folded_affine_min = folded::ValueBuilder<AffineMinOp>; 41 using folded_linalg_range = folded::ValueBuilder<linalg::RangeOp>; 42 43 #define DEBUG_TYPE "linalg-promotion" 44 45 static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers) { 46 auto *ctx = size.getContext(); 47 auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); 48 if (!dynamicBuffers) 49 if (auto cst = dyn_cast_or_null<ConstantIndexOp>(size.getDefiningOp())) 50 return std_alloc( 51 MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx))); 52 Value mul = std_muli(std_constant_index(width), size); 53 return std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul); 54 } 55 56 // Performs promotion of a `subView` into a local buffer of the size of the 57 // *ranges* of the `subView`. This produces a buffer whose size may be bigger 58 // than the actual size of the `subView` at the boundaries. 59 // This is related to the full/partial tile problem. 60 // Returns a PromotionInfo containing a `buffer`, `fullLocalView` and 61 // `partialLocalView` such that: 62 // * `buffer` is always the size of the full tile. 63 // * `fullLocalView` is a dense contiguous view into that buffer. 64 // * `partialLocalView` is a dense non-contiguous slice of `fullLocalView` 65 // that corresponds to the size of `subView` and accounting for boundary 66 // effects. 67 // The point of the full tile buffer is that constant static tile sizes are 68 // folded and result in a buffer type with statically known size and alignment 69 // properties. 70 // To account for general boundary effects, padding must be performed on the 71 // boundary tiles. For now this is done with an unconditional `fill` op followed 72 // by a partial `copy` op. 73 static PromotionInfo promoteFullTileBuffer(OpBuilder &b, Location loc, 74 SubViewOp subView, 75 bool dynamicBuffers, 76 OperationFolder *folder) { 77 auto zero = folded_std_constant_index(folder, 0); 78 auto one = folded_std_constant_index(folder, 1); 79 80 auto viewType = subView.getType(); 81 auto rank = viewType.getRank(); 82 Value allocSize = one; 83 SmallVector<Value, 8> fullRanges, partialRanges; 84 fullRanges.reserve(rank); 85 partialRanges.reserve(rank); 86 for (auto en : llvm::enumerate(subView.getRanges())) { 87 auto rank = en.index(); 88 auto rangeValue = en.value(); 89 Value d = rangeValue.size; 90 allocSize = folded_std_muli(folder, allocSize, d).getValue(); 91 fullRanges.push_back(d); 92 partialRanges.push_back( 93 folded_linalg_range(folder, zero, std_dim(subView, rank), one)); 94 } 95 SmallVector<int64_t, 4> dynSizes(fullRanges.size(), -1); 96 auto buffer = 97 allocBuffer(viewType.getElementType(), allocSize, dynamicBuffers); 98 auto fullLocalView = std_view( 99 MemRefType::get(dynSizes, viewType.getElementType()), buffer, fullRanges); 100 auto partialLocalView = linalg_slice(fullLocalView, partialRanges); 101 return PromotionInfo{buffer, fullLocalView, partialLocalView}; 102 } 103 104 SmallVector<PromotionInfo, 8> 105 mlir::linalg::promoteSubViews(OpBuilder &b, Location loc, 106 ArrayRef<Value> subViews, bool dynamicBuffers, 107 OperationFolder *folder) { 108 if (subViews.empty()) 109 return {}; 110 111 ScopedContext scope(b, loc); 112 SmallVector<PromotionInfo, 8> res; 113 res.reserve(subViews.size()); 114 DenseMap<Value, PromotionInfo> promotionInfoMap; 115 for (auto v : subViews) { 116 SubViewOp subView = cast<SubViewOp>(v.getDefiningOp()); 117 auto promotionInfo = 118 promoteFullTileBuffer(b, loc, subView, dynamicBuffers, folder); 119 promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo)); 120 res.push_back(promotionInfo); 121 } 122 123 for (auto v : subViews) { 124 SubViewOp subView = cast<SubViewOp>(v.getDefiningOp()); 125 auto info = promotionInfoMap.find(v); 126 if (info == promotionInfoMap.end()) 127 continue; 128 Value fillVal; 129 if (auto t = subView.getType().getElementType().dyn_cast<FloatType>()) 130 fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0)); 131 else if (auto t = 132 subView.getType().getElementType().dyn_cast<IntegerType>()) 133 fillVal = folded_std_constant_int(folder, 0, t); 134 // TODO(ntv): fill is only necessary if `promotionInfo` has a full local 135 // view that is different from the partial local view and we are on the 136 // boundary. 137 linalg_fill(info->second.fullLocalView, fillVal); 138 } 139 140 for (auto v : subViews) { 141 auto info = promotionInfoMap.find(v); 142 if (info == promotionInfoMap.end()) 143 continue; 144 linalg_copy(cast<SubViewOp>(v.getDefiningOp()), 145 info->second.partialLocalView); 146 } 147 return res; 148 } 149 150 LinalgOp mlir::linalg::promoteSubViewOperands(OpBuilder &b, LinalgOp op, 151 SetVector<Value> subViews, 152 bool dynamicBuffers, 153 OperationFolder *folder) { 154 assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); 155 156 if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) { 157 // TODO(ntv): add a level of indirection to linalg.generic. 158 if (convOp.padding()) 159 llvm_unreachable("Unexpected conv with padding"); 160 } 161 162 // 1. Promote the specified views and use them in the new op. 163 ScopedContext scope(b, op.getLoc()); 164 auto promotedBufferAndViews = promoteSubViews( 165 b, op.getLoc(), subViews.getArrayRef(), dynamicBuffers, folder); 166 SmallVector<Value, 8> opViews; 167 opViews.reserve(op.getNumInputsAndOutputs()); 168 SmallVector<std::pair<Value, Value>, 8> writebackViews; 169 writebackViews.reserve(subViews.size()); 170 unsigned promotedIdx = 0; 171 for (auto view : op.getInputsAndOutputBuffers()) { 172 if (subViews.count(view) != 0) { 173 opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); 174 writebackViews.emplace_back(std::make_pair( 175 view, promotedBufferAndViews[promotedIdx].partialLocalView)); 176 promotedIdx++; 177 } else { 178 opViews.push_back(view); 179 } 180 } 181 182 // 2. Append all other operands as they appear, this enforces that such 183 // operands are not views. This is to support cases such as FillOp taking 184 // extra scalars etc. 185 auto operands = getAssumedNonViewOperands(op); 186 opViews.append(operands.begin(), operands.end()); 187 LinalgOp res = op.clone(b, op.getLoc(), opViews); 188 189 // 3. Emit write-back for the promoted output views: copy the partial view. 190 for (auto viewAndPartialLocalView : writebackViews) { 191 // WARNING: MUST use the old op to determine whether the operand view is an 192 // output. 193 bool isOutput = 194 op.getIndexOfOutputBuffer(viewAndPartialLocalView.first).hasValue(); 195 if (isOutput) 196 linalg_copy(viewAndPartialLocalView.second, 197 viewAndPartialLocalView.first); 198 } 199 200 // 4. Dealloc local buffers. 201 for (const auto &pi : promotedBufferAndViews) 202 std_dealloc(pi.buffer); 203 204 return res; 205 } 206 207 static void promoteSubViews(FuncOp f, bool dynamicBuffers) { 208 SmallVector<LinalgOp, 8> toErase; 209 OperationFolder folder(f.getContext()); 210 f.walk([dynamicBuffers, &folder, &toErase](LinalgOp op) { 211 if (!op.hasBufferSemantics()) 212 return; 213 214 // TODO(ntv) some heuristic here to decide what to promote. Atm only float 215 // and integer buffers can be promoted. 216 SetVector<Value> subViews; 217 OpBuilder b(op); 218 for (auto it : op.getInputsAndOutputBuffers()) 219 if (auto sv = dyn_cast_or_null<SubViewOp>(it.getDefiningOp())) 220 if (sv.getType().getElementType().isSignlessIntOrFloat()) 221 subViews.insert(sv); 222 if (!subViews.empty()) { 223 promoteSubViewOperands(b, op, subViews, dynamicBuffers, &folder); 224 toErase.push_back(op); 225 } 226 }); 227 for (auto op : toErase) 228 op.erase(); 229 } 230 231 namespace { 232 struct LinalgPromotionPass : public LinalgPromotionBase<LinalgPromotionPass> { 233 LinalgPromotionPass() = default; 234 LinalgPromotionPass(bool dynamicBuffers) { 235 this->dynamicBuffers = dynamicBuffers; 236 } 237 238 void runOnFunction() override { 239 promoteSubViews(getFunction(), dynamicBuffers); 240 } 241 }; 242 } // namespace 243 244 std::unique_ptr<OperationPass<FuncOp>> 245 mlir::createLinalgPromotionPass(bool dynamicBuffers) { 246 return std::make_unique<LinalgPromotionPass>(dynamicBuffers); 247 } 248 std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgPromotionPass() { 249 return std::make_unique<LinalgPromotionPass>(); 250 } 251