xref: /llvm-project/mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp (revision 2bc4c3e920ee078ef2879b00c40440e0867f0b9e)
1 //===- LowerVectorScam.cpp - Lower 'vector.scan' operation ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements target-independent rewrites and utilities to lower the
10 // 'vector.scan' operation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Dialect/Affine/IR/AffineOps.h"
15 #include "mlir/Dialect/Arith/IR/Arith.h"
16 #include "mlir/Dialect/Arith/Utils/Utils.h"
17 #include "mlir/Dialect/Linalg/IR/Linalg.h"
18 #include "mlir/Dialect/MemRef/IR/MemRef.h"
19 #include "mlir/Dialect/SCF/IR/SCF.h"
20 #include "mlir/Dialect/Tensor/IR/Tensor.h"
21 #include "mlir/Dialect/Utils/IndexingUtils.h"
22 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
23 #include "mlir/Dialect/Vector/IR/VectorOps.h"
24 #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"
25 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"
26 #include "mlir/IR/BuiltinAttributeInterfaces.h"
27 #include "mlir/IR/BuiltinTypes.h"
28 #include "mlir/IR/ImplicitLocOpBuilder.h"
29 #include "mlir/IR/Location.h"
30 #include "mlir/IR/Matchers.h"
31 #include "mlir/IR/PatternMatch.h"
32 #include "mlir/IR/TypeUtilities.h"
33 #include "mlir/Interfaces/VectorInterfaces.h"
34 #include "mlir/Support/LogicalResult.h"
35 
36 #define DEBUG_TYPE "vector-broadcast-lowering"
37 
38 using namespace mlir;
39 using namespace mlir::vector;
40 
41 /// This function constructs the appropriate integer or float
42 /// operation given the vector combining kind and operands. The
43 /// supported int operations are : add, mul, min (signed/unsigned),
44 /// max(signed/unsigned), and, or, xor. The supported float
45 /// operations are : add, mul, min and max.
46 static Value genOperator(Location loc, Value x, Value y,
47                          vector::CombiningKind kind,
48                          PatternRewriter &rewriter) {
49   using vector::CombiningKind;
50 
51   auto elType = x.getType().cast<VectorType>().getElementType();
52   bool isInt = elType.isIntOrIndex();
53 
54   Value combinedResult{nullptr};
55   switch (kind) {
56   case CombiningKind::ADD:
57     if (isInt)
58       combinedResult = rewriter.create<arith::AddIOp>(loc, x, y);
59     else
60       combinedResult = rewriter.create<arith::AddFOp>(loc, x, y);
61     break;
62   case CombiningKind::MUL:
63     if (isInt)
64       combinedResult = rewriter.create<arith::MulIOp>(loc, x, y);
65     else
66       combinedResult = rewriter.create<arith::MulFOp>(loc, x, y);
67     break;
68   case CombiningKind::MINUI:
69     combinedResult = rewriter.create<arith::MinUIOp>(loc, x, y);
70     break;
71   case CombiningKind::MINSI:
72     combinedResult = rewriter.create<arith::MinSIOp>(loc, x, y);
73     break;
74   case CombiningKind::MAXUI:
75     combinedResult = rewriter.create<arith::MaxUIOp>(loc, x, y);
76     break;
77   case CombiningKind::MAXSI:
78     combinedResult = rewriter.create<arith::MaxSIOp>(loc, x, y);
79     break;
80   case CombiningKind::AND:
81     combinedResult = rewriter.create<arith::AndIOp>(loc, x, y);
82     break;
83   case CombiningKind::OR:
84     combinedResult = rewriter.create<arith::OrIOp>(loc, x, y);
85     break;
86   case CombiningKind::XOR:
87     combinedResult = rewriter.create<arith::XOrIOp>(loc, x, y);
88     break;
89   case CombiningKind::MINF:
90     combinedResult = rewriter.create<arith::MinFOp>(loc, x, y);
91     break;
92   case CombiningKind::MAXF:
93     combinedResult = rewriter.create<arith::MaxFOp>(loc, x, y);
94     break;
95   }
96   return combinedResult;
97 }
98 
99 /// This function checks to see if the vector combining kind
100 /// is consistent with the integer or float element type.
101 static bool isValidKind(bool isInt, vector::CombiningKind kind) {
102   using vector::CombiningKind;
103   enum class KindType { FLOAT, INT, INVALID };
104   KindType type{KindType::INVALID};
105   switch (kind) {
106   case CombiningKind::MINF:
107   case CombiningKind::MAXF:
108     type = KindType::FLOAT;
109     break;
110   case CombiningKind::MINUI:
111   case CombiningKind::MINSI:
112   case CombiningKind::MAXUI:
113   case CombiningKind::MAXSI:
114   case CombiningKind::AND:
115   case CombiningKind::OR:
116   case CombiningKind::XOR:
117     type = KindType::INT;
118     break;
119   case CombiningKind::ADD:
120   case CombiningKind::MUL:
121     type = isInt ? KindType::INT : KindType::FLOAT;
122     break;
123   }
124   bool isValidIntKind = (type == KindType::INT) && isInt;
125   bool isValidFloatKind = (type == KindType::FLOAT) && (!isInt);
126   return (isValidIntKind || isValidFloatKind);
127 }
128 
129 namespace {
130 /// Convert vector.scan op into arith ops and vector.insert_strided_slice /
131 /// vector.extract_strided_slice.
132 ///
133 /// Example:
134 ///
135 /// ```
136 ///   %0:2 = vector.scan <add>, %arg0, %arg1
137 ///     {inclusive = true, reduction_dim = 1} :
138 ///     (vector<2x3xi32>, vector<2xi32>) to (vector<2x3xi32>, vector<2xi32>)
139 /// ```
140 ///
141 /// is converted to:
142 ///
143 /// ```
144 ///   %cst = arith.constant dense<0> : vector<2x3xi32>
145 ///   %0 = vector.extract_strided_slice %arg0
146 ///     {offsets = [0, 0], sizes = [2, 1], strides = [1, 1]}
147 ///       : vector<2x3xi32> to vector<2x1xi32>
148 ///   %1 = vector.insert_strided_slice %0, %cst
149 ///     {offsets = [0, 0], strides = [1, 1]}
150 ///       : vector<2x1xi32> into vector<2x3xi32>
151 ///   %2 = vector.extract_strided_slice %arg0
152 ///     {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]}
153 ///       : vector<2x3xi32> to vector<2x1xi32>
154 ///   %3 = arith.muli %0, %2 : vector<2x1xi32>
155 ///   %4 = vector.insert_strided_slice %3, %1
156 ///     {offsets = [0, 1], strides = [1, 1]}
157 ///       : vector<2x1xi32> into vector<2x3xi32>
158 ///   %5 = vector.extract_strided_slice %arg0
159 ///     {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]}
160 ///       : vector<2x3xi32> to vector<2x1xi32>
161 ///   %6 = arith.muli %3, %5 : vector<2x1xi32>
162 ///   %7 = vector.insert_strided_slice %6, %4
163 ///     {offsets = [0, 2], strides = [1, 1]}
164 ///       : vector<2x1xi32> into vector<2x3xi32>
165 ///   %8 = vector.shape_cast %6 : vector<2x1xi32> to vector<2xi32>
166 ///   return %7, %8 : vector<2x3xi32>, vector<2xi32>
167 /// ```
168 struct ScanToArithOps : public OpRewritePattern<vector::ScanOp> {
169   using OpRewritePattern::OpRewritePattern;
170 
171   LogicalResult matchAndRewrite(vector::ScanOp scanOp,
172                                 PatternRewriter &rewriter) const override {
173     auto loc = scanOp.getLoc();
174     VectorType destType = scanOp.getDestType();
175     ArrayRef<int64_t> destShape = destType.getShape();
176     auto elType = destType.getElementType();
177     bool isInt = elType.isIntOrIndex();
178     if (!isValidKind(isInt, scanOp.getKind()))
179       return failure();
180 
181     VectorType resType = VectorType::get(destShape, elType);
182     Value result = rewriter.create<arith::ConstantOp>(
183         loc, resType, rewriter.getZeroAttr(resType));
184     int64_t reductionDim = scanOp.getReductionDim();
185     bool inclusive = scanOp.getInclusive();
186     int64_t destRank = destType.getRank();
187     VectorType initialValueType = scanOp.getInitialValueType();
188     int64_t initialValueRank = initialValueType.getRank();
189 
190     SmallVector<int64_t> reductionShape(destShape.begin(), destShape.end());
191     reductionShape[reductionDim] = 1;
192     VectorType reductionType = VectorType::get(reductionShape, elType);
193     SmallVector<int64_t> offsets(destRank, 0);
194     SmallVector<int64_t> strides(destRank, 1);
195     SmallVector<int64_t> sizes(destShape.begin(), destShape.end());
196     sizes[reductionDim] = 1;
197     ArrayAttr scanSizes = rewriter.getI64ArrayAttr(sizes);
198     ArrayAttr scanStrides = rewriter.getI64ArrayAttr(strides);
199 
200     Value lastOutput, lastInput;
201     for (int i = 0; i < destShape[reductionDim]; i++) {
202       offsets[reductionDim] = i;
203       ArrayAttr scanOffsets = rewriter.getI64ArrayAttr(offsets);
204       Value input = rewriter.create<vector::ExtractStridedSliceOp>(
205           loc, reductionType, scanOp.getSource(), scanOffsets, scanSizes,
206           scanStrides);
207       Value output;
208       if (i == 0) {
209         if (inclusive) {
210           output = input;
211         } else {
212           if (initialValueRank == 0) {
213             // ShapeCastOp cannot handle 0-D vectors
214             output = rewriter.create<vector::BroadcastOp>(
215                 loc, input.getType(), scanOp.getInitialValue());
216           } else {
217             output = rewriter.create<vector::ShapeCastOp>(
218                 loc, input.getType(), scanOp.getInitialValue());
219           }
220         }
221       } else {
222         Value y = inclusive ? input : lastInput;
223         output = genOperator(loc, lastOutput, y, scanOp.getKind(), rewriter);
224         assert(output != nullptr);
225       }
226       result = rewriter.create<vector::InsertStridedSliceOp>(
227           loc, output, result, offsets, strides);
228       lastOutput = output;
229       lastInput = input;
230     }
231 
232     Value reduction;
233     if (initialValueRank == 0) {
234       Value v = rewriter.create<vector::ExtractOp>(loc, lastOutput, 0);
235       reduction =
236           rewriter.create<vector::BroadcastOp>(loc, initialValueType, v);
237     } else {
238       reduction = rewriter.create<vector::ShapeCastOp>(loc, initialValueType,
239                                                        lastOutput);
240     }
241 
242     rewriter.replaceOp(scanOp, {result, reduction});
243     return success();
244   }
245 };
246 } // namespace
247 
248 void mlir::vector::populateVectorScanLoweringPatterns(
249     RewritePatternSet &patterns, PatternBenefit benefit) {
250   patterns.add<ScanToArithOps>(patterns.getContext(), benefit);
251 }
252