1 //===- Promotion.cpp - Implementation of linalg Promotion -----------------===// 2 // 3 // Copyright 2019 The MLIR Authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // ============================================================================= 17 // 18 // This file implements the linalg dialect Promotion pass. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 23 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" 24 #include "mlir/Dialect/Linalg/Passes.h" 25 #include "mlir/Dialect/Linalg/Utils/Intrinsics.h" 26 #include "mlir/Dialect/Linalg/Utils/Utils.h" 27 #include "mlir/Dialect/LoopOps/LoopOps.h" 28 #include "mlir/EDSC/Helpers.h" 29 #include "mlir/IR/AffineExpr.h" 30 #include "mlir/IR/AffineExprVisitor.h" 31 #include "mlir/IR/AffineMap.h" 32 #include "mlir/IR/OpImplementation.h" 33 #include "mlir/Pass/Pass.h" 34 #include "mlir/Support/LLVM.h" 35 #include "mlir/Support/STLExtras.h" 36 #include "mlir/Transforms/FoldUtils.h" 37 38 #include "llvm/ADT/SetVector.h" 39 #include "llvm/Support/CommandLine.h" 40 41 using namespace mlir; 42 using namespace mlir::edsc; 43 using namespace mlir::edsc::intrinsics; 44 using namespace mlir::linalg; 45 using namespace mlir::linalg::intrinsics; 46 using namespace mlir::loop; 47 48 using llvm::SetVector; 49 50 #define DEBUG_TYPE "linalg-promotion" 51 52 static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); 53 static llvm::cl::opt<bool> clPromoteDynamic( 54 "test-linalg-promote-dynamic", 55 llvm::cl::desc("Test generation of dynamic promoted buffers"), 56 llvm::cl::cat(clOptionsCategory), llvm::cl::init(false)); 57 58 static ValuePtr allocBuffer(Type elementType, ValuePtr size, 59 bool dynamicBuffers) { 60 auto *ctx = size->getContext(); 61 auto width = llvm::divideCeil(elementType.getIntOrFloatBitWidth(), 8); 62 if (!dynamicBuffers) 63 if (auto cst = dyn_cast_or_null<ConstantIndexOp>(size->getDefiningOp())) 64 return alloc( 65 MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx))); 66 ValuePtr mul = muli(constant_index(width), size); 67 return alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul); 68 } 69 70 // Performs promotion of a `subView` into a local buffer of the size of the 71 // *ranges* of the `subView`. This produces a buffer whose size may be bigger 72 // than the actual size of the `subView` at the boundaries. 73 // This is related to the full/partial tile problem. 74 // Returns a PromotionInfo containing a `buffer`, `fullLocalView` and 75 // `partialLocalView` such that: 76 // * `buffer` is always the size of the full tile. 77 // * `fullLocalView` is a dense contiguous view into that buffer. 78 // * `partialLocalView` is a dense non-contiguous slice of `fullLocalView` 79 // that corresponds to the size of `subView` and accounting for boundary 80 // effects. 81 // The point of the full tile buffer is that constant static tile sizes are 82 // folded and result in a buffer type with statically known size and alignment 83 // properties. 84 // To account for general boundary effects, padding must be performed on the 85 // boundary tiles. For now this is done with an unconditional `fill` op followed 86 // by a partial `copy` op. 87 static PromotionInfo promoteFullTileBuffer(OpBuilder &b, Location loc, 88 SubViewOp subView, 89 bool dynamicBuffers, 90 OperationFolder *folder) { 91 auto zero = constant_index(folder, 0); 92 auto one = constant_index(folder, 1); 93 94 auto viewType = subView.getType(); 95 auto rank = viewType.getRank(); 96 ValuePtr allocSize = one; 97 SmallVector<ValuePtr, 8> fullRanges, partialRanges; 98 fullRanges.reserve(rank); 99 partialRanges.reserve(rank); 100 for (auto en : llvm::enumerate(subView.getRanges())) { 101 auto rank = en.index(); 102 auto rangeValue = en.value(); 103 ValuePtr d = rangeValue.size; 104 allocSize = muli(folder, allocSize, d).getValue(); 105 fullRanges.push_back(d); 106 partialRanges.push_back(range(folder, zero, dim(subView, rank), one)); 107 } 108 SmallVector<int64_t, 4> dynSizes(fullRanges.size(), -1); 109 auto buffer = 110 allocBuffer(viewType.getElementType(), allocSize, dynamicBuffers); 111 auto fullLocalView = view( 112 MemRefType::get(dynSizes, viewType.getElementType()), buffer, fullRanges); 113 auto partialLocalView = slice(fullLocalView, partialRanges); 114 return PromotionInfo{buffer, fullLocalView, partialLocalView}; 115 } 116 117 SmallVector<PromotionInfo, 8> 118 mlir::linalg::promoteSubViews(OpBuilder &b, Location loc, 119 ArrayRef<ValuePtr> subViews, bool dynamicBuffers, 120 OperationFolder *folder) { 121 if (subViews.empty()) 122 return {}; 123 124 ScopedContext scope(b, loc); 125 SmallVector<PromotionInfo, 8> res; 126 res.reserve(subViews.size()); 127 DenseMap<ValuePtr, PromotionInfo> promotionInfoMap; 128 for (auto v : subViews) { 129 SubViewOp subView = cast<SubViewOp>(v->getDefiningOp()); 130 auto viewType = subView.getType(); 131 // TODO(ntv): support more cases than just float. 132 if (!viewType.getElementType().isa<FloatType>()) 133 continue; 134 auto promotionInfo = 135 promoteFullTileBuffer(b, loc, subView, dynamicBuffers, folder); 136 promotionInfoMap.insert(std::make_pair(subView.getResult(), promotionInfo)); 137 res.push_back(promotionInfo); 138 } 139 140 for (auto v : subViews) { 141 SubViewOp subView = cast<SubViewOp>(v->getDefiningOp()); 142 auto info = promotionInfoMap.find(v); 143 if (info == promotionInfoMap.end()) 144 continue; 145 // TODO(ntv): value to fill with should be related to the operation. 146 // For now, just use APFloat(0.0f). 147 auto t = subView.getType().getElementType().cast<FloatType>(); 148 ValuePtr fillVal = constant_float(folder, APFloat(0.0f), t); 149 // TODO(ntv): fill is only necessary if `promotionInfo` has a full local 150 // view that is different from the partial local view and we are on the 151 // boundary. 152 fill(info->second.fullLocalView, fillVal); 153 } 154 155 for (auto v : subViews) { 156 auto info = promotionInfoMap.find(v); 157 if (info == promotionInfoMap.end()) 158 continue; 159 copy(cast<SubViewOp>(v->getDefiningOp()), info->second.partialLocalView); 160 } 161 return res; 162 } 163 164 LinalgOp mlir::linalg::promoteSubViewOperands(OpBuilder &b, LinalgOp op, 165 SetVector<ValuePtr> subViews, 166 bool dynamicBuffers, 167 OperationFolder *folder) { 168 // 1. Promote the specified views and use them in the new op. 169 ScopedContext scope(b, op.getLoc()); 170 auto promotedBufferAndViews = promoteSubViews( 171 b, op.getLoc(), subViews.getArrayRef(), dynamicBuffers, folder); 172 SmallVector<ValuePtr, 8> opViews; 173 opViews.reserve(op.getNumInputsAndOutputs()); 174 SmallVector<std::pair<ValuePtr, ValuePtr>, 8> writebackViews; 175 writebackViews.reserve(subViews.size()); 176 unsigned promotedIdx = 0; 177 for (auto view : op.getInputsAndOutputs()) { 178 if (subViews.count(view) != 0) { 179 opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); 180 writebackViews.emplace_back(std::make_pair( 181 view, promotedBufferAndViews[promotedIdx].partialLocalView)); 182 promotedIdx++; 183 } else { 184 opViews.push_back(view); 185 } 186 } 187 188 // 2. Append all other operands as they appear, this enforces that such 189 // operands are not views. This is to support cases such as FillOp taking 190 // extra scalars etc. 191 auto operands = getAssumedNonViewOperands(op); 192 opViews.append(operands.begin(), operands.end()); 193 LinalgOp res = op.clone(b, op.getLoc(), opViews); 194 195 // 3. Emit write-back for the promoted output views: copy the partial view. 196 for (auto viewAndPartialLocalView : writebackViews) { 197 // WARNING: MUST use the old op to determine whether the operand view is an 198 // output. 199 bool isOutput = 200 op.getIndexOfOutput(viewAndPartialLocalView.first).hasValue(); 201 if (isOutput) 202 copy(viewAndPartialLocalView.second, viewAndPartialLocalView.first); 203 } 204 205 // 4. Dealloc local buffers. 206 for (const auto &pi : promotedBufferAndViews) 207 dealloc(pi.buffer); 208 209 return res; 210 } 211 212 static void promoteSubViews(FuncOp f, bool dynamicBuffers) { 213 SmallVector<LinalgOp, 8> toErase; 214 OperationFolder folder(f.getContext()); 215 f.walk([dynamicBuffers, &folder, &toErase](LinalgOp op) { 216 // TODO(ntv) some heuristic here to decide what to promote. Atm it is all or 217 // nothing. 218 SetVector<ValuePtr> subViews; 219 OpBuilder b(op); 220 for (auto it : op.getInputsAndOutputs()) 221 if (auto sv = dyn_cast_or_null<SubViewOp>(it->getDefiningOp())) 222 subViews.insert(sv); 223 if (!subViews.empty()) { 224 promoteSubViewOperands(b, op, subViews, dynamicBuffers, &folder); 225 toErase.push_back(op); 226 } 227 }); 228 for (auto op : toErase) 229 op.erase(); 230 } 231 232 namespace { 233 struct LinalgPromotionPass : public FunctionPass<LinalgPromotionPass> { 234 LinalgPromotionPass() = default; 235 LinalgPromotionPass(bool dynamicBuffers) : dynamicBuffers(dynamicBuffers) {} 236 237 void runOnFunction() override { 238 promoteSubViews(getFunction(), dynamicBuffers); 239 } 240 241 bool dynamicBuffers; 242 }; 243 } // namespace 244 245 std::unique_ptr<OpPassBase<FuncOp>> 246 mlir::linalg::createLinalgPromotionPass(bool dynamicBuffers) { 247 return std::make_unique<LinalgPromotionPass>(dynamicBuffers); 248 } 249 250 static PassRegistration<LinalgPromotionPass> 251 pass("linalg-promote-subviews", "promote subview ops to local buffers", [] { 252 return std::make_unique<LinalgPromotionPass>(clPromoteDynamic); 253 }); 254