1 //===- LowerVectorBroadcast.cpp - Lower 'vector.broadcast' operation ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements target-independent rewrites and utilities to lower the 10 // 'vector.broadcast' operation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/Affine/IR/AffineOps.h" 15 #include "mlir/Dialect/Arith/IR/Arith.h" 16 #include "mlir/Dialect/Arith/Utils/Utils.h" 17 #include "mlir/Dialect/Linalg/IR/Linalg.h" 18 #include "mlir/Dialect/MemRef/IR/MemRef.h" 19 #include "mlir/Dialect/SCF/IR/SCF.h" 20 #include "mlir/Dialect/Tensor/IR/Tensor.h" 21 #include "mlir/Dialect/Utils/IndexingUtils.h" 22 #include "mlir/Dialect/Utils/StructuredOpsUtils.h" 23 #include "mlir/Dialect/Vector/IR/VectorOps.h" 24 #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" 25 #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" 26 #include "mlir/Dialect/Vector/Utils/VectorUtils.h" 27 #include "mlir/IR/BuiltinAttributeInterfaces.h" 28 #include "mlir/IR/BuiltinTypes.h" 29 #include "mlir/IR/ImplicitLocOpBuilder.h" 30 #include "mlir/IR/Location.h" 31 #include "mlir/IR/Matchers.h" 32 #include "mlir/IR/PatternMatch.h" 33 #include "mlir/IR/TypeUtilities.h" 34 #include "mlir/Interfaces/VectorInterfaces.h" 35 36 #define DEBUG_TYPE "vector-broadcast-lowering" 37 38 using namespace mlir; 39 using namespace mlir::vector; 40 41 namespace { 42 /// Progressive lowering of BroadcastOp. 43 class BroadcastOpLowering : public OpRewritePattern<vector::BroadcastOp> { 44 public: 45 using OpRewritePattern::OpRewritePattern; 46 47 LogicalResult matchAndRewrite(vector::BroadcastOp op, 48 PatternRewriter &rewriter) const override { 49 auto loc = op.getLoc(); 50 VectorType dstType = op.getResultVectorType(); 51 VectorType srcType = dyn_cast<VectorType>(op.getSourceType()); 52 Type eltType = dstType.getElementType(); 53 54 // Scalar to any vector can use splat. 55 if (!srcType) { 56 rewriter.replaceOpWithNewOp<vector::SplatOp>(op, dstType, op.getSource()); 57 return success(); 58 } 59 60 // Determine rank of source and destination. 61 int64_t srcRank = srcType.getRank(); 62 int64_t dstRank = dstType.getRank(); 63 64 // Stretching scalar inside vector (e.g. vector<1xf32>) can use splat. 65 if (srcRank <= 1 && dstRank == 1) { 66 Value ext; 67 if (srcRank == 0) 68 ext = rewriter.create<vector::ExtractElementOp>(loc, op.getSource()); 69 else 70 ext = rewriter.create<vector::ExtractOp>(loc, op.getSource(), 0); 71 rewriter.replaceOpWithNewOp<vector::SplatOp>(op, dstType, ext); 72 return success(); 73 } 74 75 // Duplicate this rank. 76 // For example: 77 // %x = broadcast %y : k-D to n-D, k < n 78 // becomes: 79 // %b = broadcast %y : k-D to (n-1)-D 80 // %x = [%b,%b,%b,%b] : n-D 81 // becomes: 82 // %b = [%y,%y] : (n-1)-D 83 // %x = [%b,%b,%b,%b] : n-D 84 if (srcRank < dstRank) { 85 // Duplication. 86 VectorType resType = VectorType::Builder(dstType).dropDim(0); 87 Value bcst = 88 rewriter.create<vector::BroadcastOp>(loc, resType, op.getSource()); 89 Value result = rewriter.create<arith::ConstantOp>( 90 loc, dstType, rewriter.getZeroAttr(dstType)); 91 for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) 92 result = rewriter.create<vector::InsertOp>(loc, bcst, result, d); 93 rewriter.replaceOp(op, result); 94 return success(); 95 } 96 97 // Find non-matching dimension, if any. 98 assert(srcRank == dstRank); 99 int64_t m = -1; 100 for (int64_t r = 0; r < dstRank; r++) 101 if (srcType.getDimSize(r) != dstType.getDimSize(r)) { 102 m = r; 103 break; 104 } 105 106 // All trailing dimensions are the same. Simply pass through. 107 if (m == -1) { 108 rewriter.replaceOp(op, op.getSource()); 109 return success(); 110 } 111 112 // Any non-matching dimension forces a stretch along this rank. 113 // For example: 114 // %x = broadcast %y : vector<4x1x2xf32> to vector<4x2x2xf32> 115 // becomes: 116 // %a = broadcast %y[0] : vector<1x2xf32> to vector<2x2xf32> 117 // %b = broadcast %y[1] : vector<1x2xf32> to vector<2x2xf32> 118 // %c = broadcast %y[2] : vector<1x2xf32> to vector<2x2xf32> 119 // %d = broadcast %y[3] : vector<1x2xf32> to vector<2x2xf32> 120 // %x = [%a,%b,%c,%d] 121 // becomes: 122 // %u = broadcast %y[0][0] : vector<2xf32> to vector <2x2xf32> 123 // %v = broadcast %y[1][0] : vector<2xf32> to vector <2x2xf32> 124 // %a = [%u, %v] 125 // .. 126 // %x = [%a,%b,%c,%d] 127 VectorType resType = 128 VectorType::get(dstType.getShape().drop_front(), eltType, 129 dstType.getScalableDims().drop_front()); 130 Value result = rewriter.create<arith::ConstantOp>( 131 loc, dstType, rewriter.getZeroAttr(dstType)); 132 if (m == 0) { 133 // Stetch at start. 134 Value ext = rewriter.create<vector::ExtractOp>(loc, op.getSource(), 0); 135 Value bcst = rewriter.create<vector::BroadcastOp>(loc, resType, ext); 136 for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) 137 result = rewriter.create<vector::InsertOp>(loc, bcst, result, d); 138 } else { 139 // Stetch not at start. 140 if (dstType.getScalableDims()[0]) { 141 // TODO: For scalable vectors we should emit an scf.for loop. 142 return failure(); 143 } 144 for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) { 145 Value ext = rewriter.create<vector::ExtractOp>(loc, op.getSource(), d); 146 Value bcst = rewriter.create<vector::BroadcastOp>(loc, resType, ext); 147 result = rewriter.create<vector::InsertOp>(loc, bcst, result, d); 148 } 149 } 150 rewriter.replaceOp(op, result); 151 return success(); 152 } 153 }; 154 } // namespace 155 156 void mlir::vector::populateVectorBroadcastLoweringPatterns( 157 RewritePatternSet &patterns, PatternBenefit benefit) { 158 patterns.add<BroadcastOpLowering>(patterns.getContext(), benefit); 159 } 160