xref: /llvm-project/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td (revision aa2952165cd1808dab2bb49b97becc097f4c9cac)
1//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines MLIR vector operations.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef MLIR_DIALECT_VECTOR_IR_VECTOR_OPS
14#define MLIR_DIALECT_VECTOR_IR_VECTOR_OPS
15
16include "mlir/Dialect/Arith/IR/ArithBase.td"
17include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
18include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.td"
19include "mlir/Dialect/Vector/Interfaces/MaskingOpInterface.td"
20include "mlir/Dialect/Vector/IR/Vector.td"
21include "mlir/Dialect/Vector/IR/VectorAttributes.td"
22include "mlir/Interfaces/ControlFlowInterfaces.td"
23include "mlir/Interfaces/DestinationStyleOpInterface.td"
24include "mlir/Interfaces/InferIntRangeInterface.td"
25include "mlir/Interfaces/InferTypeOpInterface.td"
26include "mlir/Interfaces/SideEffectInterfaces.td"
27include "mlir/Interfaces/VectorInterfaces.td"
28include "mlir/Interfaces/ViewLikeInterface.td"
29include "mlir/IR/BuiltinAttributes.td"
30include "mlir/IR/EnumAttr.td"
31
32// TODO: Add an attribute to specify a different algebra with operators other
33// than the current set: {*, +}.
34def Vector_ContractionOp :
35  Vector_Op<"contract", [
36      Pure,
37      PredOpTrait<"lhs and rhs have same element type", TCopVTEtIsSameAs<0, 1>>,
38      PredOpTrait<"third operand acc and result have same element type",
39                  TCresVTEtIsSameAsOpBase<0, 2>>,
40      DeclareOpInterfaceMethods<MaskableOpInterface>,
41      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
42    ]>,
43    Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyVectorOfNonZeroRank:$rhs, AnyType:$acc,
44               ArrayAttr:$indexing_maps,
45               Vector_IteratorTypeArrayAttr:$iterator_types,
46               DefaultValuedAttr<Vector_CombiningKindAttr,
47                                 "CombiningKind::ADD">:$kind)>,
48    Results<(outs AnyType)> {
49  let summary = "vector contraction operation";
50  let description = [{
51    Computes the sum of products of vector elements along contracting
52    dimension pairs from 2 vectors of rank M and N respectively, adds this
53    intermediate result to the accumulator argument of rank K, and returns a
54    vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
55    num_batch_dims (see dimension type descriptions below)). For K = 0 (no
56    free or batch dimensions), the accumulator and output are a scalar.
57
58    If operands and the result have types of different bitwidths, operands are
59    promoted to have the same bitwidth as the result before performing the
60    contraction. For integer types, only signless integer types are supported,
61    and the promotion happens via sign extension.
62
63    An iterator type attribute list must be specified, where each element of
64    the list represents an iterator with one of the following types:
65
66    *   "reduction": reduction dimensions are present in the lhs and rhs
67        arguments but not in the output (and accumulator
68        argument). These are the dimensions along which the vector
69        contraction op computes the sum of products, and
70        contracting dimension pair dimension sizes must match
71        between lhs/rhs.
72
73    *   "parallel": Batch dimensions are iterator type "parallel", and
74        are non-contracting dimensions present in the lhs, rhs and
75        output. The lhs/rhs co-iterate along the batch dimensions,
76        which should be expressed in their indexing maps.
77
78        Free dimensions are iterator type "parallel", and are
79        non-contraction, non-batch dimensions accessed by either the
80        lhs or rhs (but not both). The lhs and rhs free dimensions
81        are unrelated to each other and do not co-iterate, which
82        should be expressed in their indexing maps.
83
84    An indexing map attribute list must be specified with an entry for lhs, rhs
85    and acc arguments. An indexing map attribute specifies a mapping from each
86    iterator in the iterator type list, to each dimension of an N-D vector.
87
88    An optional kind attribute may be used to specify the combining function
89    between the intermediate result and accumulator argument of rank K. This
90    attribute can take the values `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`
91    /`and`/`or`/`xor` for integers, and `add`/`mul`/`minnumf`/`maxnumf`
92    /`minimumf`/`maximumf` for floats. The default is `add`.
93
94    Example:
95
96    ```mlir
97    // Simple DOT product (K = 0).
98    #contraction_accesses = [
99     affine_map<(i) -> (i)>,
100     affine_map<(i) -> (i)>,
101     affine_map<(i) -> ()>
102    ]
103    #contraction_trait = {
104      indexing_maps = #contraction_accesses,
105      iterator_types = ["reduction"]
106    }
107    %3 = vector.contract #contraction_trait %0, %1, %2
108      : vector<10xf32>, vector<10xf32> into f32
109
110    // 2D vector contraction with one contracting dimension (matmul, K = 2).
111    #contraction_accesses = [
112      affine_map<(i, j, k) -> (i, k)>,
113      affine_map<(i, j, k) -> (k, j)>,
114      affine_map<(i, j, k) -> (i, j)>
115    ]
116    #contraction_trait = {
117      indexing_maps = #contraction_accesses,
118      iterator_types = ["parallel", "parallel", "reduction"]
119    }
120
121    %3 = vector.contract #contraction_trait %0, %1, %2
122      : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
123
124    // 4D to 3D vector contraction with two contracting dimensions and
125    // one batch dimension (K = 3).
126    #contraction_accesses = [
127      affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
128      affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
129      affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
130    ]
131    #contraction_trait = {
132      indexing_maps = #contraction_accesses,
133      iterator_types = ["parallel", "parallel", "parallel",
134                        "reduction", "reduction"]
135    }
136
137    %4 = vector.contract #contraction_trait %0, %1, %2
138        : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
139
140    // Vector contraction with mixed typed. lhs/rhs have different element
141    // types than accumulator/result.
142    %5 = vector.contract #contraction_trait %0, %1, %2
143      : vector<10xf16>, vector<10xf16> into f32
144
145    // Contract with max (K = 0).
146    #contraction_accesses = [
147     affine_map<(i) -> (i)>,
148     affine_map<(i) -> (i)>,
149     affine_map<(i) -> ()>
150    ]
151    #contraction_trait = {
152      indexing_maps = #contraction_accesses,
153      iterator_types = ["reduction"],
154      kind = #vector.kind<maxnumf>
155    }
156    %6 = vector.contract #contraction_trait %0, %1, %2
157      : vector<10xf32>, vector<10xf32> into f32
158    ```
159  }];
160  let builders = [
161    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
162      "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes)>,
163    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
164      "ArrayRef<ArrayRef<AffineExpr>>":$indexingExprs,
165      "ArrayRef<IteratorType>":$iteratorTypes)>,
166    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
167      "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes,
168      "CombiningKind":$kind)>
169  ];
170  let extraClassDeclaration = [{
171    VectorType getLhsType() {
172      return ::llvm::cast<VectorType>(getLhs().getType());
173    }
174    VectorType getRhsType() {
175      return ::llvm::cast<VectorType>(getRhs().getType());
176    }
177    Type getAccType() { return getAcc().getType(); }
178    Type getResultType() { return getResult().getType(); }
179    SmallVector<StringRef> getTraitAttrNames();
180    static unsigned getAccOperandIndex() { return 2; }
181
182    llvm::SmallVector<::mlir::AffineMap, 4> getIndexingMapsArray() {
183      return llvm::to_vector<4>(getIndexingMaps().getAsValueRange<::mlir::AffineMapAttr>());
184    }
185
186    // Returns the bounds of each dimension in the iteration space spanned
187    // by the iterator types of this operation.
188    void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);
189
190    // Returns a list of index maps, where there is a list entry for each
191    // op indexing map attribute (i.e. one for each input and output, with
192    // the output listed last). Each index map, maps from this operations
193    // iteration space, to vector dimensions of the maps input/output.
194    void getIterationIndexMap(
195      std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);
196
197    std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
198    std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
199
200    static CombiningKind getDefaultKind() {
201      return CombiningKind::ADD;
202    }
203
204    SmallVector<IteratorType> getIteratorTypesArray() {
205      auto range =
206          getIteratorTypes()
207              .template getAsValueRange<IteratorTypeAttr, IteratorType>();
208      return {range.begin(), range.end()};
209    }
210  }];
211
212  let hasCanonicalizer = 1;
213  let hasCustomAssemblyFormat = 1;
214  let hasVerifier = 1;
215}
216
217def Vector_ReductionOp :
218  Vector_Op<"reduction", [Pure,
219     PredOpTrait<"source operand and result have same element type",
220                 TCresVTEtIsSameAsOpBase<0, 0>>,
221     OptionalTypesMatchWith<"dest and acc have the same type",
222                            "dest", "acc", "::llvm::cast<Type>($_self)">,
223     DeclareOpInterfaceMethods<ArithFastMathInterface>,
224     DeclareOpInterfaceMethods<MaskableOpInterface>,
225     DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
226    ]>,
227    Arguments<(ins Vector_CombiningKindAttr:$kind,
228               AnyVectorOfAnyRank:$vector,
229               Optional<AnyType>:$acc,
230               DefaultValuedAttr<
231                 Arith_FastMathAttr,
232                 "::mlir::arith::FastMathFlags::none">:$fastmath)>,
233    Results<(outs AnyType:$dest)> {
234  let summary = "reduction operation";
235  let description = [{
236    Reduces an 1-D vector "horizontally" into a scalar using the given
237    operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for
238    integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf`/`maximumf` for
239    floats. Reductions also allow an optional fused accumulator.
240
241    Note that these operations are restricted to 1-D vectors to remain
242    close to the corresponding LLVM intrinsics:
243
244    http://llvm.org/docs/LangRef.html#vector-reduction-intrinsics
245
246    Example:
247
248    ```mlir
249    %1 = vector.reduction <add>, %0 : vector<16xf32> into f32
250
251    %3 = vector.reduction <xor>, %2 : vector<4xi32> into i32
252
253    %4 = vector.reduction <mul>, %0, %1 : vector<16xf32> into f32
254    ```
255  }];
256  let extraClassDeclaration = [{
257    VectorType getSourceVectorType() {
258      return ::llvm::cast<VectorType>(getVector().getType());
259    }
260  }];
261  let builders = [
262    // Builder that infers the type of `dest`.
263    OpBuilder<(ins "CombiningKind":$kind, "Value":$vector, "Value":$acc,
264                    CArg<"::mlir::arith::FastMathFlags",
265                         "::mlir::arith::FastMathFlags::none">:$fastMathFlags)>,
266    // Builder that infers the type of `dest` and has no accumulator.
267    OpBuilder<(ins "CombiningKind":$kind, "Value":$vector,
268                    CArg<"::mlir::arith::FastMathFlags",
269                         "::mlir::arith::FastMathFlags::none">:$fastMathFlags)>
270  ];
271
272  let assemblyFormat = "$kind `,` $vector (`,` $acc^)? (`fastmath` `` $fastmath^)?"
273                       " attr-dict `:` type($vector) `into` type($dest)";
274  let hasCanonicalizer = 1;
275  let hasVerifier = 1;
276}
277
278def Vector_MultiDimReductionOp :
279  Vector_Op<"multi_reduction", [Pure,
280     AllTypesMatch<["dest", "acc"]>,
281     PredOpTrait<"source operand and result have same element type",
282                 TCresVTEtIsSameAsOpBase<0, 0>>,
283     DeclareOpInterfaceMethods<InferTypeOpInterface>,
284     DeclareOpInterfaceMethods<MaskableOpInterface>,
285     DeclareOpInterfaceMethods<VectorUnrollOpInterface,
286                               ["getShapeForUnroll"]>]>,
287    Arguments<(ins Vector_CombiningKindAttr:$kind,
288                   AnyVectorOfNonZeroRank:$source,
289                   AnyType:$acc,
290                   DenseI64ArrayAttr:$reduction_dims)>,
291    Results<(outs AnyType:$dest)> {
292  let summary = "Multi-dimensional reduction operation";
293  let description = [{
294    Reduces an n-D vector into an (n-k)-D vector (or a scalar when k == n)
295    using the given operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`
296    /`and`/`or`/`xor` for integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf`
297    /`maximumf` for floats.
298    Takes an initial accumulator operand.
299
300    Example:
301
302    ```mlir
303    %1 = vector.multi_reduction <add>, %0, %acc0 [1, 3] :
304      vector<4x8x16x32xf32> to vector<4x16xf32>
305    %2 = vector.multi_reduction <add>, %1, %acc1 [0, 1] :
306      vector<4x16xf32> to f32
307    ```
308  }];
309  let builders = [
310    OpBuilder<(ins "Value":$source, "Value":$acc,
311                   "ArrayRef<bool>":$reductionMask, "CombiningKind":$kind)>
312  ];
313  let extraClassDeclaration = [{
314    VectorType getSourceVectorType() {
315      return ::llvm::cast<VectorType>(getSource().getType());
316    }
317    Type getDestType() {
318      return getDest().getType();
319    }
320
321    bool isReducedDim(int64_t d) {
322      assert(d >= 0 && d < static_cast<int64_t>(getReductionMask().size()) &&
323        "d overflows the number of dims");
324      return getReductionMask()[d];
325    }
326
327    SmallVector<bool> getReductionMask() {
328      SmallVector<bool> res(getSourceVectorType().getRank(), false);
329      for (int64_t dim : getReductionDims())
330        res[dim] = true;
331      return res;
332    }
333    static SmallVector<bool> getReductionMask(
334        ArrayRef<int64_t> reductionDims, unsigned sourceRank) {
335      SmallVector<bool> res(sourceRank, false);
336      for (auto idx : reductionDims)
337        res[idx] = true;
338      return res;
339    }
340  }];
341  let assemblyFormat =
342    "$kind `,` $source `,` $acc attr-dict $reduction_dims `:` type($source) `to` type($dest)";
343  let hasFolder = 1;
344  let hasCanonicalizer = 1;
345  let hasVerifier = 1;
346}
347
348def Vector_BroadcastOp :
349  Vector_Op<"broadcast", [Pure,
350     DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
351     PredOpTrait<"source operand and result have same element type",
352                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
353    Arguments<(ins AnyType:$source)>,
354    Results<(outs AnyVectorOfAnyRank:$vector)> {
355  let summary = "broadcast operation";
356  let description = [{
357    Broadcasts the scalar or k-D vector value in the source operand
358    to a n-D result vector such that the broadcast makes sense, i.e.,
359    the source operand is duplicated to match the given rank and sizes
360    in the result vector. The legality rules are:
361    * the source operand must have the same element type as the result type
362    * a k-D vector <s_1 x .. x s_k x type> can be broadcast to
363      a n-D vector <t_1 x .. x t_n x type> if
364       * k <= n, and
365       * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n
366          match exactly as s_j = t_i or s_j = 1:
367       ```
368           t_1 x   ..  t_n-k x t_n-k+1 x .. x t_i x .. x t_n
369                               s_1     x .. x s_j x .. x s_k
370               <duplication>         <potential stretch>
371       ```
372       * in addition, any scalable unit dimension, `[1]`, must match exactly.
373
374    The source operand is duplicated over all the missing leading dimensions
375    and stretched over the trailing dimensions where the source has a non-equal
376    dimension of 1. These rules imply that any scalar broadcast (k=0) to any
377    shaped vector with the same element type is always legal.
378
379    Example:
380
381    ```mlir
382    %0 = arith.constant 0.0 : f32
383    %1 = vector.broadcast %0 : f32 to vector<16xf32>
384    %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
385    ```
386  }];
387  let extraClassDeclaration = [{
388    Type getSourceType() { return getSource().getType(); }
389    VectorType getResultVectorType() {
390      return ::llvm::cast<VectorType>(getVector().getType());
391    }
392
393    /// Return the dimensions of the result vector that were formerly ones in the
394    /// source tensor and thus correspond to "dim-1" broadcasting.
395    llvm::SetVector<int64_t> computeBroadcastedUnitDims();
396
397    /// Broadcast `value` to a vector of `dstShape`, knowing that exactly the
398    /// `broadcastedDims` dimensions in the dstShape are broadcasted.
399    /// This requires (and asserts) that the broadcast is free of dim-1
400    /// broadcasting.
401    /// Since vector.broadcast only allows expanding leading dimensions, an extra
402    /// vector.transpose may be inserted to make the broadcast possible.
403    /// `value`, `dstShape` and `broadcastedDims` must be properly specified or
404    /// the helper will assert. This means:
405    ///   1. `dstShape` must not be empty.
406    ///   2. `broadcastedDims` must be confined to [0 .. rank(value.getResultVectorType)]
407    ///   2. `dstShape` trimmed of the dimensions specified in `broadcastedDims`
408    //       must match the `value` shape.
409    static Value createOrFoldBroadcastOp(
410      OpBuilder &b, Value value,
411      ArrayRef<int64_t> dstShape,
412      const llvm::SetVector<int64_t> &broadcastedDims);
413  }];
414  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)";
415  let hasFolder = 1;
416  let hasCanonicalizer = 1;
417  let hasVerifier = 1;
418}
419
420def Vector_ShuffleOp
421    : Vector_Op<
422          "shuffle",
423          [Pure,
424           PredOpTrait<"first operand v1 and result have same element type",
425                       TCresVTEtIsSameAsOpBase<0, 0>>,
426           PredOpTrait<"second operand v2 and result have same element type",
427                       TCresVTEtIsSameAsOpBase<0, 1>>,
428           InferTypeOpAdaptor]>,
429      Arguments<(ins AnyFixedVectorOfAnyRank:$v1, AnyFixedVectorOfAnyRank:$v2,
430          DenseI64ArrayAttr:$mask)>,
431      Results<(outs AnyVectorOfNonZeroRank:$vector)> {
432  let summary = "shuffle operation";
433  let description = [{
434    The shuffle operation constructs a permutation (or duplication) of elements
435    from two input vectors, returning a vector with the same element type as
436    the input and a length that is the same as the shuffle mask. The two input
437    vectors must have the same element type, same rank, and trailing dimension
438    sizes and shuffles their values in the leading dimension (which may differ
439    in size) according to the given mask. The legality rules are:
440    * the two operands must have the same element type as the result
441      - Either, the two operands and the result must have the same
442        rank and trailing dimension sizes, viz. given two k-D operands
443                v1 : <s_1 x s_2 x .. x s_k x type> and
444                v2 : <t_1 x t_2 x .. x t_k x type>
445        we have s_i = t_i for all 1 < i <= k
446      - Or, the two operands must be 0-D vectors and the result is a 1-D vector.
447    * the mask length equals the leading dimension size of the result
448    * numbering the input vector indices left to right across the operands, all
449      mask values must be within range, viz. given two k-D operands v1 and v2
450      above, all mask values are in the range [0,s_1+t_1). The value `-1`
451      represents a poison mask value, which specifies that the selected element
452      is poison.
453
454    Note, scalable vectors are not supported.
455
456    Example:
457
458    ```mlir
459    %0 = vector.shuffle %a, %b[0, 3]
460               : vector<2xf32>, vector<2xf32>       ; yields vector<2xf32>
461    %1 = vector.shuffle %c, %b[0, 1, 2]
462               : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
463    %2 = vector.shuffle %a, %b[3, 2, 1, 0]
464               : vector<2xf32>, vector<2xf32>       ; yields vector<4xf32>
465    %3 = vector.shuffle %a, %b[0, 1]
466               : vector<f32>, vector<f32>           ; yields vector<2xf32>
467    %4 = vector.shuffle %a, %b[0, 4, -1, -1, -1, -1]
468               : vector<4xf32>, vector<4xf32>       ; yields vector<6xf32>
469    ```
470  }];
471
472  let extraClassDeclaration = extraPoisonClassDeclaration # [{
473    VectorType getV1VectorType() {
474      return ::llvm::cast<VectorType>(getV1().getType());
475    }
476    VectorType getV2VectorType() {
477      return ::llvm::cast<VectorType>(getV2().getType());
478    }
479    VectorType getResultVectorType() {
480      return ::llvm::cast<VectorType>(getVector().getType());
481    }
482  }];
483
484  let assemblyFormat = "operands $mask attr-dict `:` type(operands)";
485
486  let hasFolder = 1;
487  let hasVerifier = 1;
488  let hasCanonicalizer = 1;
489}
490
491def ResultIsDoubleSourceVectorType : TypesMatchWith<
492    "type of 'result' is double the width of the inputs",
493    "lhs", "result",
494    [{
495      [&]() -> ::mlir::VectorType {
496        auto vectorType = ::llvm::cast<::mlir::VectorType>($_self);
497        ::mlir::VectorType::Builder builder(vectorType);
498        if (vectorType.getRank() == 0) {
499          static constexpr int64_t v2xTyShape[] = {2};
500          return builder.setShape(v2xTyShape);
501        }
502        auto lastDim = vectorType.getRank() - 1;
503        return builder.setDim(lastDim, vectorType.getDimSize(lastDim) * 2);
504      }()
505    }]>;
506
507def Vector_InterleaveOp :
508  Vector_Op<"interleave", [Pure, AllTypesMatch<["lhs", "rhs"]>,
509    ResultIsDoubleSourceVectorType]> {
510  let summary = "constructs a vector by interleaving two input vectors";
511  let description = [{
512    The interleave operation constructs a new vector by interleaving the
513    elements from the trailing (or final) dimension of two input vectors,
514    returning a new vector where the trailing dimension is twice the size.
515
516    Note that for the n-D case this differs from the interleaving possible with
517    `vector.shuffle`, which would only operate on the leading dimension.
518
519    Another key difference is this operation supports scalable vectors, though
520    currently a general LLVM lowering is limited to the case where only the
521    trailing dimension is scalable.
522
523    Example:
524    ```mlir
525    %a = arith.constant dense<[0, 1]> : vector<2xi32>
526    %b = arith.constant dense<[2, 3]> : vector<2xi32>
527    // The value of `%0` is `[0, 2, 1, 3]`.
528    %0 = vector.interleave %a, %b : vector<2xi32> -> vector<4xi32>
529
530    // Examples showing allowed input and result types.
531    %1 = vector.interleave %c, %d : vector<f16> -> vector<2xf16>
532    %2 = vector.interleave %e, %f : vector<6x3xf32> -> vector<6x6xf32>
533    %3 = vector.interleave %g, %h : vector<[4]xi32> -> vector<[8]xi32>
534    %4 = vector.interleave %i, %j : vector<2x4x[2]xf64> -> vector<2x4x[4]xf64>
535    ```
536  }];
537
538  let arguments = (ins AnyVectorOfAnyRank:$lhs, AnyVectorOfAnyRank:$rhs);
539  let results = (outs AnyVectorOfNonZeroRank:$result);
540
541  let assemblyFormat = [{
542    $lhs `,` $rhs  attr-dict `:` type($lhs) `->` type($result)
543  }];
544
545  let extraClassDeclaration = [{
546    VectorType getSourceVectorType() {
547      return ::llvm::cast<VectorType>(getLhs().getType());
548    }
549    VectorType getResultVectorType() {
550      return ::llvm::cast<VectorType>(getResult().getType());
551    }
552  }];
553}
554
555class ResultIsHalfSourceVectorType<string result> : TypesMatchWith<
556  "the trailing dimension of the results is half the width of source trailing dimension",
557  "source", result,
558  [{
559    [&]() -> ::mlir::VectorType {
560      auto vectorType = ::llvm::cast<mlir::VectorType>($_self);
561      ::mlir::VectorType::Builder builder(vectorType);
562      auto lastDim = vectorType.getRank() - 1;
563      auto newDimSize = vectorType.getDimSize(lastDim) / 2;;
564      if (newDimSize <= 0)
565         return vectorType; // (invalid input type)
566      return builder.setDim(lastDim, newDimSize);
567    }()
568  }]
569>;
570
571def SourceVectorEvenElementCount : PredOpTrait<
572  "the trailing dimension of the source vector has an even number of elements",
573  CPred<[{
574    [&](){
575      auto srcVec = getSourceVectorType();
576      return srcVec.getDimSize(srcVec.getRank() - 1) % 2 == 0;
577    }()
578  }]>
579>;
580
581def Vector_DeinterleaveOp :
582  Vector_Op<"deinterleave", [Pure,
583    SourceVectorEvenElementCount,
584    ResultIsHalfSourceVectorType<"res1">,
585    AllTypesMatch<["res1", "res2"]>
586    ]> {
587      let summary = "constructs two vectors by deinterleaving an input vector";
588      let description = [{
589        The deinterleave operation constructs two vectors from a single input
590        vector. The first result vector contains the elements from even indexes
591        of the input, and the second contains elements from odd indexes. This is
592        the inverse of a `vector.interleave` operation.
593
594        Each output's trailing dimension is half of the size of the input
595        vector's trailing dimension. This operation requires the input vector
596        to have a rank > 0 and an even number of elements in its trailing
597        dimension.
598
599        The operation supports scalable vectors.
600
601        Example:
602        ```mlir
603        %0, %1 = vector.deinterleave %a
604                   : vector<8xi8> -> vector<4xi8>
605        %2, %3 = vector.deinterleave %b
606                   : vector<2x8xi8> -> vector<2x4xi8>
607        %4, %5 = vector.deinterleave %c
608                   : vector<2x8x4xi8> -> vector<2x8x2xi8>
609        %6, %7 = vector.deinterleave %d
610                   : vector<[8]xf32> -> vector<[4]xf32>
611        %8, %9 = vector.deinterleave %e
612                   : vector<2x[6]xf64> -> vector<2x[3]xf64>
613        %10, %11 = vector.deinterleave %f
614                   : vector<2x4x[6]xf64> -> vector<2x4x[3]xf64>
615        ```
616      }];
617
618      let arguments = (ins AnyVectorOfNonZeroRank:$source);
619      let results = (outs AnyVectorOfNonZeroRank:$res1, AnyVectorOfNonZeroRank:$res2);
620
621      let assemblyFormat = [{
622        $source attr-dict `:` type($source) `->` type($res1)
623      }];
624
625      let extraClassDeclaration = [{
626        VectorType getSourceVectorType() {
627          return ::llvm::cast<VectorType>(getSource().getType());
628        }
629        VectorType getResultVectorType() {
630          return ::llvm::cast<VectorType>(getRes1().getType());
631        }
632      }];
633    }
634
635def Vector_ExtractElementOp :
636  Vector_Op<"extractelement", [Pure,
637     DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
638     TypesMatchWith<"result type matches element type of vector operand",
639                    "vector", "result",
640                    "::llvm::cast<VectorType>($_self).getElementType()">]>,
641    Arguments<(ins AnyVectorOfAnyRank:$vector,
642                   Optional<AnySignlessIntegerOrIndex>:$position)>,
643    Results<(outs AnyType:$result)> {
644  let summary = "extractelement operation";
645  let description = [{
646    Takes a 0-D or 1-D vector and a optional dynamic index position and
647    extracts the scalar at that position.
648
649    Note that this instruction resembles vector.extract, but is restricted to
650    0-D and 1-D vectors and relaxed to dynamic indices.
651    If the vector is 0-D, the position must be std::nullopt.
652
653
654    It is meant to be closer to LLVM's version:
655    https://llvm.org/docs/LangRef.html#extractelement-instruction
656
657    Example:
658
659    ```mlir
660    %c = arith.constant 15 : i32
661    %1 = vector.extractelement %0[%c : i32]: vector<16xf32>
662    %2 = vector.extractelement %z[]: vector<f32>
663    ```
664  }];
665  let assemblyFormat = [{
666    $vector `[` ($position^ `:` type($position))? `]` attr-dict `:` type($vector)
667  }];
668
669  let builders = [
670    // 0-D builder.
671    OpBuilder<(ins "Value":$source)>,
672  ];
673  let extraClassDeclaration = [{
674    VectorType getSourceVectorType() {
675      return ::llvm::cast<VectorType>(getVector().getType());
676    }
677  }];
678  let hasVerifier = 1;
679  let hasFolder = 1;
680}
681
682def Vector_ExtractOp :
683  Vector_Op<"extract", [Pure,
684     DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
685     PredOpTrait<"operand and result have same element type",
686                 TCresVTEtIsSameAsOpBase<0, 0>>,
687     InferTypeOpAdaptorWithIsCompatible]> {
688  let summary = "extract operation";
689  let description = [{
690    Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
691    the proper position. Degenerates to an element type if n-k is zero.
692
693    Static and dynamic indices must be greater or equal to zero and less than
694    the size of the corresponding dimension. The result is undefined if any
695    index is out-of-bounds. The value `-1` represents a poison index, which
696    specifies that the extracted element is poison.
697
698    Example:
699
700    ```mlir
701    %1 = vector.extract %0[3]: vector<8x16xf32> from vector<4x8x16xf32>
702    %2 = vector.extract %0[2, 1, 3]: f32 from vector<4x8x16xf32>
703    %3 = vector.extract %1[]: vector<f32> from vector<f32>
704    %4 = vector.extract %0[%a, %b, %c]: f32 from vector<4x8x16xf32>
705    %5 = vector.extract %0[2, %b]: vector<16xf32> from vector<4x8x16xf32>
706    %6 = vector.extract %10[-1, %c]: f32 from vector<4x16xf32>
707    ```
708  }];
709
710  let arguments = (ins
711    AnyVectorOfAnyRank:$vector,
712    Variadic<Index>:$dynamic_position,
713    DenseI64ArrayAttr:$static_position
714  );
715  let results = (outs AnyType:$result);
716
717  let builders = [
718    OpBuilder<(ins "Value":$source, "int64_t":$position)>,
719    OpBuilder<(ins "Value":$source, "OpFoldResult":$position)>,
720    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$position)>,
721    OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$position)>,
722  ];
723
724  let extraClassDeclaration = extraPoisonClassDeclaration # [{
725    VectorType getSourceVectorType() {
726      return ::llvm::cast<VectorType>(getVector().getType());
727    }
728
729    /// Return a vector with all the static and dynamic position indices.
730    SmallVector<OpFoldResult> getMixedPosition() {
731      OpBuilder builder(getContext());
732      return getMixedValues(getStaticPosition(), getDynamicPosition(), builder);
733    }
734
735    unsigned getNumIndices() {
736      return getStaticPosition().size();
737    }
738
739    /// Return "true" if the op has at least one dynamic position.
740    bool hasDynamicPosition() {
741      return !getDynamicPosition().empty();
742    }
743  }];
744
745  let assemblyFormat = [{
746    $vector ``
747    custom<DynamicIndexList>($dynamic_position, $static_position)
748    attr-dict `:` type($result) `from` type($vector)
749  }];
750
751  let hasCanonicalizer = 1;
752  let hasFolder = 1;
753  let hasVerifier = 1;
754}
755
756def Vector_FMAOp :
757  Op<Vector_Dialect, "fma", [
758       Pure, AllTypesMatch<["lhs", "rhs", "acc", "result"]>,
759       DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
760     ] # ElementwiseMappable.traits>,
761    Arguments<(ins VectorOfAnyRankOf<[AnyFloat]>:$lhs,
762                   VectorOfAnyRankOf<[AnyFloat]>:$rhs,
763                   VectorOfAnyRankOf<[AnyFloat]>:$acc)>,
764    Results<(outs VectorOfAnyRankOf<[AnyFloat]>:$result)> {
765  let summary = "vector fused multiply-add";
766  let description = [{
767    Multiply-add expressions operate on n-D vectors and compute a fused
768    pointwise multiply-and-accumulate: `$result = $lhs * $rhs + $acc`.
769    All operands and result have the same vector type. The semantics
770    of the operation correspond to those of the `llvm.fma`
771    [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the
772    particular case of lowering to LLVM, this is guaranteed to lower
773    to the `llvm.fma.*` intrinsic.
774
775    Example:
776
777    ```mlir
778    %3 = vector.fma %0, %1, %2: vector<8x16xf32>
779    ```
780  }];
781  let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)";
782  let extraClassDeclaration = [{
783    VectorType getVectorType() { return ::llvm::cast<VectorType>(getLhs().getType()); }
784  }];
785}
786
787def Vector_FromElementsOp : Vector_Op<"from_elements", [
788    Pure,
789    TypesMatchWith<"operand types match result element type",
790                   "result", "elements", "SmallVector<Type>("
791                   "::llvm::cast<VectorType>($_self).getNumElements(), "
792                   "::llvm::cast<VectorType>($_self).getElementType())">]> {
793  let summary = "operation that defines a vector from scalar elements";
794  let description = [{
795    This operation defines a vector from one or multiple scalar elements. The
796    number of elements must match the number of elements in the result type.
797    All elements must have the same type, which must match the element type of
798    the result vector type.
799
800    `elements` are a flattened version of the result vector in row-major order.
801
802    Example:
803
804    ```mlir
805    // %f1
806    %0 = vector.from_elements %f1 : vector<f32>
807    // [%f1, %f2]
808    %1 = vector.from_elements %f1, %f2 : vector<2xf32>
809    // [[%f1, %f2, %f3], [%f4, %f5, %f6]]
810    %2 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<2x3xf32>
811    // [[[%f1, %f2]], [[%f3, %f4]], [[%f5, %f6]]]
812    %3 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<3x1x2xf32>
813    ```
814
815    Note, scalable vectors are not supported.
816  }];
817
818  let arguments = (ins Variadic<AnyType>:$elements);
819  let results = (outs AnyFixedVectorOfAnyRank:$result);
820  let assemblyFormat = "$elements attr-dict `:` type($result)";
821  let hasCanonicalizer = 1;
822}
823
824def Vector_InsertElementOp :
825  Vector_Op<"insertelement", [Pure,
826     DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
827     TypesMatchWith<"source operand type matches element type of result",
828                    "result", "source",
829                    "::llvm::cast<VectorType>($_self).getElementType()">,
830     AllTypesMatch<["dest", "result"]>]>,
831     Arguments<(ins AnyType:$source, AnyVectorOfAnyRank:$dest,
832                    Optional<AnySignlessIntegerOrIndex>:$position)>,
833     Results<(outs AnyVectorOfAnyRank:$result)> {
834  let summary = "insertelement operation";
835  let description = [{
836    Takes a scalar source, a 0-D or 1-D destination vector and a dynamic index
837    position and inserts the source into the destination at the proper position.
838
839    Note that this instruction resembles vector.insert, but is restricted to 0-D
840    and 1-D vectors and relaxed to dynamic indices.
841
842    It is meant to be closer to LLVM's version:
843    https://llvm.org/docs/LangRef.html#insertelement-instruction
844
845    Example:
846
847    ```mlir
848    %c = arith.constant 15 : i32
849    %f = arith.constant 0.0f : f32
850    %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
851    %2 = vector.insertelement %f, %z[]: vector<f32>
852    ```
853  }];
854  let assemblyFormat = [{
855    $source `,` $dest `[` ($position^ `:` type($position))? `]`  attr-dict `:`
856    type($result)
857  }];
858
859  let builders = [
860    // 0-D builder.
861    OpBuilder<(ins "Value":$source, "Value":$dest)>,
862  ];
863  let extraClassDeclaration = [{
864    Type getSourceType() { return getSource().getType(); }
865    VectorType getDestVectorType() {
866      return ::llvm::cast<VectorType>(getDest().getType());
867    }
868  }];
869  let hasVerifier = 1;
870  let hasFolder = 1;
871}
872
873def Vector_InsertOp :
874  Vector_Op<"insert", [Pure,
875     DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
876     PredOpTrait<"source operand and result have same element type",
877                 TCresVTEtIsSameAsOpBase<0, 0>>,
878     AllTypesMatch<["dest", "result"]>]> {
879  let summary = "insert operation";
880  let description = [{
881    Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
882    and inserts the n-D source into the (n+k)-D destination at the proper
883    position. Degenerates to a scalar or a 0-d vector source type when n = 0.
884
885    Static and dynamic indices must be greater or equal to zero and less than
886    the size of the corresponding dimension. The result is undefined if any
887    index is out-of-bounds. The value `-1` represents a poison index, which
888    specifies that the resulting vector is poison.
889
890    Example:
891
892    ```mlir
893    %2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32>
894    %5 = vector.insert %3, %4[2, 1, 3] : f32 into vector<4x8x16xf32>
895    %8 = vector.insert %6, %7[] : f32 into vector<f32>
896    %11 = vector.insert %9, %10[%a, %b, %c] : vector<f32> into vector<4x8x16xf32>
897    %12 = vector.insert %4, %10[2, %b] : vector<16xf32> into vector<4x8x16xf32>
898    %13 = vector.insert %20, %1[-1, %c] : f32 into vector<4x16xf32>
899    ```
900  }];
901
902  let arguments = (ins
903    AnyType:$source,
904    AnyVectorOfAnyRank:$dest,
905    Variadic<Index>:$dynamic_position,
906    DenseI64ArrayAttr:$static_position
907  );
908  let results = (outs AnyVectorOfAnyRank:$result);
909
910  let builders = [
911    OpBuilder<(ins "Value":$source, "Value":$dest, "int64_t":$position)>,
912    OpBuilder<(ins "Value":$source, "Value":$dest, "OpFoldResult":$position)>,
913    OpBuilder<(ins "Value":$source, "Value":$dest, "ArrayRef<int64_t>":$position)>,
914    OpBuilder<(ins "Value":$source, "Value":$dest, "ArrayRef<OpFoldResult>":$position)>,
915  ];
916
917  let extraClassDeclaration = extraPoisonClassDeclaration # [{
918    Type getSourceType() { return getSource().getType(); }
919    VectorType getDestVectorType() {
920      return ::llvm::cast<VectorType>(getDest().getType());
921    }
922
923    /// Return a vector with all the static and dynamic position indices.
924    SmallVector<OpFoldResult> getMixedPosition() {
925      OpBuilder builder(getContext());
926      return getMixedValues(getStaticPosition(), getDynamicPosition(), builder);
927    }
928
929    unsigned getNumIndices() {
930      return getStaticPosition().size();
931    }
932
933    bool hasDynamicPosition() {
934      return llvm::any_of(getDynamicPosition(),
935                          [](Value operand) { return operand != nullptr; });
936    }
937  }];
938
939  let assemblyFormat = [{
940    $source `,` $dest custom<DynamicIndexList>($dynamic_position, $static_position)
941    attr-dict `:` type($source) `into` type($dest)
942  }];
943
944  let hasCanonicalizer = 1;
945  let hasFolder = 1;
946  let hasVerifier = 1;
947}
948
949def Vector_ScalableInsertOp :
950  Vector_Op<"scalable.insert", [Pure,
951       AllElementTypesMatch<["source", "dest"]>,
952       AllTypesMatch<["dest", "res"]>,
953       PredOpTrait<"position is a multiple of the source length.",
954        CPred<
955          "(getPos() % getSourceVectorType().getNumElements()) == 0"
956        >>]>,
957     Arguments<(ins VectorOfRank<[1]>:$source,
958                    ScalableVectorOfRank<[1]>:$dest,
959                    I64Attr:$pos)>,
960     Results<(outs ScalableVectorOfRank<[1]>:$res)> {
961  let summary = "insert subvector into scalable vector operation";
962  // NOTE: This operation is designed to map to `llvm.vector.insert`, and its
963  //       documentation should be kept aligned with LLVM IR:
964  //       https://llvm.org/docs/LangRef.html#llvm-vector-insert-intrinsic
965  let description = [{
966    This operations takes a rank-1 fixed-length or scalable subvector and
967    inserts it within the destination scalable vector starting from the
968    position specificed by `pos`. If the source vector is scalable, the
969    insertion position will be scaled by the runtime scaling factor of the
970    source subvector.
971
972    The insertion position must be a multiple of the minimum size of the source
973    vector. For the operation to be well defined, the source vector must fit in
974    the destination vector from the specified position. Since the destination
975    vector is scalable and its runtime length is unknown, the validity of the
976    operation can't be verified nor guaranteed at compile time.
977
978    Example:
979
980    ```mlir
981    %2 = vector.scalable.insert %0, %1[8] : vector<4xf32> into vector<[16]xf32>
982    %5 = vector.scalable.insert %3, %4[0] : vector<8xf32> into vector<[4]xf32>
983    %8 = vector.scalable.insert %6, %7[0] : vector<[4]xf32> into vector<[8]xf32>
984    ```
985
986    Invalid example:
987    ```mlir
988    %2 = vector.scalable.insert %0, %1[5] : vector<4xf32> into vector<[16]xf32>
989    ```
990  }];
991
992  let assemblyFormat = [{
993    $source `,` $dest `[` $pos `]` attr-dict `:` type($source) `into` type($dest)
994  }];
995
996  let extraClassDeclaration = extraPoisonClassDeclaration # [{
997    VectorType getSourceVectorType() {
998      return ::llvm::cast<VectorType>(getSource().getType());
999    }
1000    VectorType getDestVectorType() {
1001      return ::llvm::cast<VectorType>(getDest().getType());
1002    }
1003  }];
1004}
1005
1006def Vector_ScalableExtractOp :
1007  Vector_Op<"scalable.extract", [Pure,
1008       AllElementTypesMatch<["source", "res"]>,
1009       PredOpTrait<"position is a multiple of the result length.",
1010        CPred<
1011          "(getPos() % getResultVectorType().getNumElements()) == 0"
1012        >>]>,
1013     Arguments<(ins ScalableVectorOfRank<[1]>:$source,
1014                    I64Attr:$pos)>,
1015     Results<(outs VectorOfRank<[1]>:$res)> {
1016  let summary = "extract subvector from scalable vector operation";
1017  // NOTE: This operation is designed to map to `llvm.vector.extract`, and its
1018  //       documentation should be kept aligned with LLVM IR:
1019  //       https://llvm.org/docs/LangRef.html#llvm-vector-extract-intrinsic
1020  let description = [{
1021    Takes rank-1 source vector and a position `pos` within the source
1022    vector, and extracts a subvector starting from that position.
1023
1024    The extraction position must be a multiple of the minimum size of the result
1025    vector. For the operation to be well defined, the destination vector must
1026    fit within the source vector from the specified position. Since the source
1027    vector is scalable and its runtime length is unknown, the validity of the
1028    operation can't be verified nor guaranteed at compile time.
1029
1030    Example:
1031
1032    ```mlir
1033    %1 = vector.scalable.extract %0[8] : vector<4xf32> from vector<[8]xf32>
1034    %3 = vector.scalable.extract %2[0] : vector<[4]xf32> from vector<[8]xf32>
1035    ```
1036
1037    Invalid example:
1038    ```mlir
1039    %1 = vector.scalable.extract %0[5] : vector<4xf32> from vector<[16]xf32>
1040    ```
1041  }];
1042
1043  let assemblyFormat = [{
1044    $source `[` $pos `]` attr-dict `:` type($res) `from` type($source)
1045  }];
1046
1047  let extraClassDeclaration = extraPoisonClassDeclaration # [{
1048    VectorType getSourceVectorType() {
1049      return ::llvm::cast<VectorType>(getSource().getType());
1050    }
1051    VectorType getResultVectorType() {
1052      return ::llvm::cast<VectorType>(getRes().getType());
1053    }
1054  }];
1055}
1056
1057def Vector_InsertStridedSliceOp :
1058  Vector_Op<"insert_strided_slice", [Pure,
1059    PredOpTrait<"operand #0 and result have same element type",
1060                 TCresVTEtIsSameAsOpBase<0, 0>>,
1061    AllTypesMatch<["dest", "res"]>]>,
1062    Arguments<(ins AnyVectorOfNonZeroRank:$source, AnyVectorOfNonZeroRank:$dest, I64ArrayAttr:$offsets,
1063               I64ArrayAttr:$strides)>,
1064    Results<(outs AnyVectorOfNonZeroRank:$res)> {
1065  let summary = "strided_slice operation";
1066  let description = [{
1067    Takes a k-D source vector, an n-D destination vector (n >= k), n-sized
1068    `offsets` integer array attribute, a k-sized `strides` integer array attribute
1069    and inserts the k-D source vector as a strided subvector at the proper offset
1070    into the n-D destination vector.
1071
1072    At the moment strides must contain only 1s.
1073
1074    Returns an n-D vector that is a copy of the n-D destination vector in which
1075    the last k-D dimensions contain the k-D source vector elements strided at
1076    the proper location as specified by the offsets.
1077
1078    Example:
1079
1080    ```mlir
1081    %2 = vector.insert_strided_slice %0, %1
1082        {offsets = [0, 0, 2], strides = [1, 1]}:
1083      vector<2x4xf32> into vector<16x4x8xf32>
1084    ```
1085  }];
1086
1087  let assemblyFormat = [{
1088    $source `,` $dest attr-dict `:` type($source) `into` type($dest)
1089  }];
1090
1091  let builders = [
1092    OpBuilder<(ins "Value":$source, "Value":$dest,
1093      "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$strides)>
1094  ];
1095  let extraClassDeclaration = [{
1096    VectorType getSourceVectorType() {
1097      return ::llvm::cast<VectorType>(getSource().getType());
1098    }
1099    VectorType getDestVectorType() {
1100      return ::llvm::cast<VectorType>(getDest().getType());
1101    }
1102    bool hasNonUnitStrides() {
1103      return llvm::any_of(getStrides(), [](Attribute attr) {
1104        return ::llvm::cast<IntegerAttr>(attr).getInt() != 1;
1105      });
1106    }
1107  }];
1108
1109  let hasFolder = 1;
1110  let hasVerifier = 1;
1111  let hasCanonicalizer = 1;
1112}
1113
1114def Vector_OuterProductOp :
1115  Vector_Op<"outerproduct", [Pure,
1116    PredOpTrait<"lhs operand and result have same element type",
1117                TCresVTEtIsSameAsOpBase<0, 0>>,
1118    PredOpTrait<"rhs operand and result have same element type",
1119                TCresVTEtIsSameAsOpBase<0, 1>>,
1120    DeclareOpInterfaceMethods<MaskableOpInterface>]>,
1121    Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyType:$rhs,
1122               Optional<AnyVectorOfNonZeroRank>:$acc,
1123               DefaultValuedAttr<Vector_CombiningKindAttr, "CombiningKind::ADD">:$kind)>,
1124    Results<(outs AnyVectorOfNonZeroRank)> {
1125  let summary = "vector outerproduct with optional fused add";
1126  let description = [{
1127    Takes 2 1-D vectors and returns the 2-D vector containing the outer-product,
1128    as illustrated below:
1129    ```
1130     outer |   [c, d]
1131     ------+------------
1132       [a, | [ [a*c, a*d],
1133        b] |   [b*c, b*d] ]
1134    ```
1135    This operation also accepts a 1-D vector lhs and a scalar rhs. In this
1136    case a simple AXPY operation is performed, which returns a 1-D vector.
1137    ```
1138        [a, b] * c = [a*c, b*c]
1139    ```
1140
1141    An optional extra vector argument with the same shape as the output
1142    vector may be specified in which case the operation returns the sum of
1143    the outer-product and the extra vector. In this multiply-accumulate
1144    scenario for floating-point arguments, the rounding mode is enforced
1145    by guaranteeing that a fused-multiply add operation is emitted. When
1146    lowered to the LLVMIR dialect, this form emits `llvm.intr.fma`, which
1147    is guaranteed to lower to actual `fma` instructions on x86.
1148
1149    An optional kind attribute may be specified to be: `add`/`mul`/`minsi`
1150    /`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for integers, and `add`/`mul`
1151    /`minnumf`/`maxnumf`/`minimumf`/`maximumf` for floats. The default is
1152    `add`.
1153
1154    Example:
1155
1156    ```
1157    %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
1158    return %2: vector<4x8xf32>
1159
1160    %3 = vector.outerproduct %0, %1, %2:
1161      vector<4xf32>, vector<8xf32>, vector<4x8xf32>
1162    return %3: vector<4x8xf32>
1163
1164    %4 = vector.outerproduct %0, %1, %2 {kind = #vector.kind<maxnumf>}:
1165      vector<4xf32>, vector<8xf32>, vector<4x8xf32>
1166    return %3: vector<4x8xf32>
1167
1168    %6 = vector.outerproduct %4, %5: vector<10xf32>, f32
1169    return %6: vector<10xf32>
1170
1171    ```
1172  }];
1173  let builders = [
1174    // Build an op without mask, use the type of `acc` as the return type.
1175    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc)>
1176  ];
1177  let extraClassDeclaration = [{
1178    VectorType getOperandVectorTypeLHS() {
1179      return ::llvm::cast<VectorType>(getLhs().getType());
1180    }
1181    Type getOperandTypeRHS() {
1182      return getRhs().getType();
1183    }
1184    VectorType getOperandVectorTypeACC() {
1185      return getAcc()
1186        ? ::llvm::cast<VectorType>(getAcc().getType())
1187        : VectorType();
1188    }
1189    VectorType getResultVectorType() {
1190      return ::llvm::cast<VectorType>(getResult().getType());
1191    }
1192    static CombiningKind getDefaultKind() {
1193      return CombiningKind::ADD;
1194    }
1195  }];
1196  let hasCustomAssemblyFormat = 1;
1197  let hasVerifier = 1;
1198}
1199
1200def Vector_ExtractStridedSliceOp :
1201  Vector_Op<"extract_strided_slice", [Pure,
1202    PredOpTrait<"operand and result have same element type",
1203                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
1204    Arguments<(ins AnyVectorOfNonZeroRank:$vector, I64ArrayAttr:$offsets,
1205               I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>,
1206    Results<(outs AnyVectorOfNonZeroRank)> {
1207  let summary = "extract_strided_slice operation";
1208  let description = [{
1209    Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized
1210    `sizes` integer array attribute, a k-sized `strides` integer array
1211    attribute and extracts the n-D subvector at the proper offset.
1212
1213    At the moment strides must contain only 1s.
1214
1215    Returns an n-D vector where the first k-D dimensions match the `sizes`
1216    attribute. The returned subvector contains the elements starting at offset
1217    `offsets` and ending at `offsets + sizes`.
1218
1219    Example:
1220
1221    ```mlir
1222    %1 = vector.extract_strided_slice %0
1223        {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
1224      vector<4x8x16xf32> to vector<2x4x16xf32>
1225
1226    // TODO: Evolve to a range form syntax similar to:
1227    %1 = vector.extract_strided_slice %0[0:2:1][2:4:1]
1228      vector<4x8x16xf32> to vector<2x4x16xf32>
1229    ```
1230
1231    TODO: Implement support for poison indices.
1232  }];
1233  let builders = [
1234    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$offsets,
1235      "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)>
1236  ];
1237  let extraClassDeclaration = [{
1238    VectorType getSourceVectorType() {
1239      return ::llvm::cast<VectorType>(getVector().getType());
1240    }
1241    void getOffsets(SmallVectorImpl<int64_t> &results);
1242    bool hasNonUnitStrides() {
1243      return llvm::any_of(getStrides(), [](Attribute attr) {
1244        return ::llvm::cast<IntegerAttr>(attr).getInt() != 1;
1245      });
1246    }
1247  }];
1248  let hasCanonicalizer = 1;
1249  let hasFolder = 1;
1250  let hasVerifier = 1;
1251  let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)";
1252}
1253
1254// TODO: Tighten semantics so that masks and inbounds can't be used
1255// simultaneously within the same transfer op.
1256def Vector_TransferReadOp :
1257  Vector_Op<"transfer_read", [
1258      DeclareOpInterfaceMethods<VectorTransferOpInterface>,
1259      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
1260      DeclareOpInterfaceMethods<MaskableOpInterface>,
1261      DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1262      DeclareOpInterfaceMethods<ConditionallySpeculatable>,
1263      AttrSizedOperandSegments,
1264      DestinationStyleOpInterface
1265    ]>,
1266    Arguments<(ins AnyShaped:$source,
1267                   Variadic<Index>:$indices,
1268                   AffineMapAttr:$permutation_map,
1269                   AnyType:$padding,
1270                   Optional<VectorOfNonZeroRankOf<[I1]>>:$mask,
1271                   BoolArrayAttr:$in_bounds)>,
1272    Results<(outs AnyVectorOfAnyRank:$vector)> {
1273
1274  let summary = "Reads a supervector from memory into an SSA vector value.";
1275
1276  let description = [{
1277    The `vector.transfer_read` op performs a read from a slice within a
1278    [MemRef](../LangRef.md#memref-type) or a Ranked
1279    [Tensor](../LangRef.md#tensor-type) supplied as its first operand
1280    into a [vector](../LangRef.md#vector-type) of the same base elemental type.
1281
1282    A memref/tensor operand with vector element type, must have its vector
1283    element type match a suffix (shape and element type) of the vector (e.g.
1284    memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>).
1285
1286    The slice is further defined by a full-rank index within the MemRef/Tensor,
1287    supplied as the operands `[1 .. 1 + rank(memref/tensor))` that defines the
1288    starting point of the transfer (e.g. `%A[%i0, %i1, %i2]`).
1289
1290    The permutation_map [attribute](../LangRef.md#attributes) is an
1291    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
1292    slice to match the vector shape. The permutation map may be implicit and
1293    omitted from parsing and printing if it is the canonical minor identity map
1294    (i.e. if it does not permute or broadcast any dimension).
1295
1296    The size of the slice is specified by the size of the vector, given as the
1297    return type.
1298
1299    An SSA value `padding` of the same elemental type as the MemRef/Tensor is
1300    provided to specify a fallback value in the case of out-of-bounds accesses
1301    and/or masking.
1302
1303    An optional SSA value `mask` may be specified to mask out elements read from
1304    the MemRef/Tensor. The `mask` type is an `i1` vector with a shape that
1305    matches how elements are read from the MemRef/Tensor, *before* any
1306    permutation or broadcasting. Elements whose corresponding mask element is
1307    `0` are masked out and replaced with `padding`.
1308
1309    For every vector dimension, the boolean array attribute `in_bounds`
1310    specifies if the transfer is guaranteed to be within the source bounds. If
1311    set to "false", accesses (including the starting point) may run
1312    out-of-bounds along the respective vector dimension as the index increases.
1313    Non-vector dimensions *must* always be in-bounds. The `in_bounds` array
1314    length has to be equal to the vector rank. This attribute has a default
1315    value: `false` (i.e. "out-of-bounds"). When skipped in the textual IR, the
1316    default value is assumed. Similarly, the OP printer will omit this
1317    attribute when all dimensions are out-of-bounds (i.e. the default value is
1318    used).
1319
1320    A `vector.transfer_read` can be lowered to a simple load if all dimensions
1321    are specified to be within bounds and no `mask` was specified.
1322
1323    This operation is called 'read' by opposition to 'load' because the
1324    super-vector granularity is generally not representable with a single
1325    hardware register. A `vector.transfer_read` is thus a mid-level abstraction
1326    that supports super-vectorization with non-effecting padding for full-tile
1327    only operations.
1328
1329    More precisely, let's dive deeper into the permutation_map for the following
1330    MLIR:
1331
1332    ```mlir
1333    vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4]
1334      { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } :
1335      memref<?x?x?x?xf32>, vector<3x4x5xf32>
1336    ```
1337
1338    This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3,
1339    %expr4]`. The size of the slice can be inferred from the resulting vector
1340    shape and walking back through the permutation map: 3 along d2 and 5 along
1341    d0, so the slice is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]`
1342
1343    That slice needs to be read into a `vector<3x4x5xf32>`. Since the
1344    permutation map is not full rank, there must be a broadcast along vector
1345    dimension `1`.
1346
1347    A notional lowering of vector.transfer_read could generate code resembling:
1348
1349    ```mlir
1350    // %expr1, %expr2, %expr3, %expr4 defined before this point
1351    // alloc a temporary buffer for performing the "gather" of the slice.
1352    %tmp = memref.alloc() : memref<vector<3x4x5xf32>>
1353    for %i = 0 to 3 {
1354      affine.for %j = 0 to 4 {
1355        affine.for %k = 0 to 5 {
1356          // Note that this load does not involve %j.
1357          %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
1358          // Update the temporary gathered slice with the individual element
1359          %slice = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32>
1360          %updated = vector.insert %a, %slice[%i, %j, %k] : f32 into vector<3x4x5xf32>
1361          memref.store %updated, %tmp : memref<vector<3x4x5xf32>>
1362    }}}
1363    // At this point we gathered the elements from the original
1364    // memref into the desired vector layout, stored in the `%tmp` allocation.
1365    %vec = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32>
1366    ```
1367
1368    On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that
1369    the temporary storage footprint could conceptually be only `3 * 5` values but
1370    `3 * 4 * 5` values are actually transferred between `%A` and `%tmp`.
1371
1372    Alternatively, if a notional vector broadcast operation were available, we
1373    could avoid the loop on `%j` and the lowered code would resemble:
1374
1375    ```mlir
1376    // %expr1, %expr2, %expr3, %expr4 defined before this point
1377    %tmp = memref.alloc() : memref<vector<3x4x5xf32>>
1378    for %i = 0 to 3 {
1379      affine.for %k = 0 to 5 {
1380        %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
1381        %slice = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32>
1382        // Here we only store to the first element in dimension one
1383        %updated = vector.insert %a, %slice[%i, 0, %k] : f32 into vector<3x4x5xf32>
1384        memref.store %updated, %tmp : memref<vector<3x4x5xf32>>
1385    }}
1386    // At this point we gathered the elements from the original
1387    // memref into the desired vector layout, stored in the `%tmp` allocation.
1388    // However we haven't replicated them alongside the first dimension, we need
1389    // to broadcast now.
1390    %partialVec = load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32>
1391    %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32>
1392    ```
1393
1394    where `broadcast` broadcasts from element 0 to all others along the
1395    specified dimension. This time, the number of loaded element is `3 * 5`
1396    values.
1397    An additional `1` broadcast is required. On a GPU this broadcast could be
1398    implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.
1399
1400    Syntax
1401    ```
1402    operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
1403      `{` attribute-entry `} :` memref-type `,` vector-type
1404    ```
1405
1406    Example:
1407
1408    ```mlir
1409    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
1410    // and pad with %f0 to handle the boundary case:
1411    %f0 = arith.constant 0.0f : f32
1412    affine.for %i0 = 0 to %0 {
1413      affine.for %i1 = 0 to %1 step 256 {
1414        affine.for %i2 = 0 to %2 step 32 {
1415          %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
1416               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
1417               memref<?x?x?xf32>, vector<32x256xf32>
1418    }}}
1419
1420    // or equivalently (rewrite with vector.transpose)
1421    %f0 = arith.constant 0.0f : f32
1422    affine.for %i0 = 0 to %0 {
1423      affine.for %i1 = 0 to %1 step 256 {
1424        affine.for %i2 = 0 to %2 step 32 {
1425          %v0 = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
1426               {permutation_map: (d0, d1, d2) -> (d1, d2)} :
1427               memref<?x?x?xf32>, vector<256x32xf32>
1428          %v = vector.transpose %v0, [1, 0] :
1429              vector<256x32xf32> to vector<32x256f32>
1430    }}}
1431
1432    // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
1433    // vector<128xf32>. The underlying implementation will require a 1-D vector
1434    // broadcast:
1435    affine.for %i0 = 0 to %0 {
1436      affine.for %i1 = 0 to %1 {
1437        %3 = vector.transfer_read %A[%i0, %i1]
1438             {permutation_map: (d0, d1) -> (0)} :
1439             memref<?x?xf32>, vector<128xf32>
1440      }
1441    }
1442
1443    // Read from a memref with vector element type.
1444    %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
1445      {permutation_map = (d0, d1)->(d0, d1)}
1446        : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
1447
1448    // Read from a tensor with vector element type.
1449    %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
1450      {permutation_map = (d0, d1)->(d0, d1)}
1451        : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
1452
1453    // Special encoding for 0-d transfer with 0-d tensor/memref, vector shape
1454    // {1} and permutation_map () -> (0).
1455    %0 = vector.transfer_read %arg0[], %f0 {permutation_map = affine_map<()->(0)>} :
1456      tensor<f32>, vector<1xf32>
1457    ```
1458  }];
1459
1460  let builders = [
1461    /// 1. Builder that sets padding to zero and an empty mask (variant with attrs).
1462    OpBuilder<(ins "VectorType":$vectorType,
1463                   "Value":$source,
1464                   "ValueRange":$indices,
1465                   "AffineMapAttr":$permutationMapAttr,
1466                   "ArrayAttr":$inBoundsAttr)>,
1467    /// 2. Builder that sets padding to zero and an empty mask (variant without attrs).
1468    OpBuilder<(ins "VectorType":$vectorType,
1469                   "Value":$source,
1470                   "ValueRange":$indices,
1471                   "AffineMap":$permutationMap,
1472                   CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>,
1473    /// 3. Builder that sets permutation map to 'getMinorIdentityMap'.
1474    OpBuilder<(ins "VectorType":$vectorType,
1475                   "Value":$source,
1476                   "ValueRange":$indices,
1477                   "Value":$padding,
1478                   CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>,
1479    /// 4. Builder that sets padding to zero and permutation map to
1480    /// 'getMinorIdentityMap'.
1481    OpBuilder<(ins "VectorType":$vectorType,
1482                   "Value":$source,
1483                   "ValueRange":$indices,
1484                   CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>,
1485  ];
1486
1487  let extraClassDeclaration = [{
1488    // MaskableOpInterface methods.
1489    bool supportsPassthru() { return true; }
1490
1491    MutableOperandRange getDpsInitsMutable() {
1492      return MutableOperandRange(getOperation(), /*start=*/0, /*length=*/0);
1493    }
1494  }];
1495
1496  let hasCanonicalizer = 1;
1497  let hasCustomAssemblyFormat = 1;
1498  let hasFolder = 1;
1499  let hasVerifier = 1;
1500}
1501
1502// TODO: Tighten semantics so that masks and inbounds can't be used
1503// simultaneously within the same transfer op.
1504def Vector_TransferWriteOp :
1505  Vector_Op<"transfer_write", [
1506      DeclareOpInterfaceMethods<VectorTransferOpInterface>,
1507      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
1508      DeclareOpInterfaceMethods<MaskableOpInterface>,
1509      DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1510      DeclareOpInterfaceMethods<ConditionallySpeculatable>,
1511      AttrSizedOperandSegments,
1512      DestinationStyleOpInterface
1513  ]>,
1514    Arguments<(ins AnyVectorOfAnyRank:$vector,
1515                   AnyShaped:$source,
1516                   Variadic<Index>:$indices,
1517                   AffineMapAttr:$permutation_map,
1518                   Optional<VectorOfNonZeroRankOf<[I1]>>:$mask,
1519                   BoolArrayAttr:$in_bounds)>,
1520    Results<(outs Optional<AnyRankedTensor>:$result)> {
1521
1522  let summary = "The vector.transfer_write op writes a supervector to memory.";
1523
1524  let description = [{
1525    The `vector.transfer_write` op performs a write from a
1526    [vector](../LangRef.md#vector-type), supplied as its first operand, into a
1527    slice within a [MemRef](../LangRef.md#memref-type) or a Ranked
1528    [Tensor](../LangRef.md#tensor-type) of the same base elemental type,
1529    supplied as its second operand.
1530
1531    A vector memref/tensor operand must have its vector element type match a
1532    suffix (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>,
1533    vector<1x1x4x3xf32>). If the operand is a tensor, the operation returns a
1534    new tensor of the same type.
1535
1536    The slice is further defined by a full-rank index within the MemRef/Tensor,
1537    supplied as the operands `[2 .. 2 + rank(memref/tensor))` that defines the
1538    starting point of the transfer (e.g. `%A[%i0, %i1, %i2, %i3]`).
1539
1540    The permutation_map [attribute](../LangRef.md#attributes) is an
1541    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
1542    slice to match the vector shape. The permutation map may be implicit and
1543    omitted from parsing and printing if it is the canonical minor identity map
1544    (i.e. if it does not permute any dimension). In contrast to `transfer_read`,
1545    write ops cannot have broadcast dimensions.
1546
1547    The size of the slice is specified by the size of the vector.
1548
1549    An optional SSA value `mask` may be specified to mask out elements written
1550    to the MemRef/Tensor. The `mask` type is an `i1` vector with a shape that
1551    matches how elements are written into the MemRef/Tensor, *after* applying
1552    any permutation. Elements whose corresponding mask element is `0` are
1553    masked out.
1554
1555    For every vector dimension, the boolean array attribute `in_bounds`
1556    specifies if the transfer is guaranteed to be within the source bounds. If
1557    set to "false", accesses (including the starting point) may run
1558    out-of-bounds along the respective vector dimension as the index increases.
1559    Non-vector dimensions *must* always be in-bounds. The `in_bounds` array
1560    length has to be equal to the vector rank. This attribute has a default
1561    value: `false` (i.e. "out-of-bounds"). When skipped in the textual IR, the
1562    default value is assumed. Similarly, the OP printer will omit this
1563    attribute when all dimensions are out-of-bounds (i.e. the default value is
1564    used).
1565
1566     A `vector.transfer_write` can be lowered to a simple store if all
1567     dimensions are specified to be within bounds and no `mask` was specified.
1568
1569    This operation is called 'write' by opposition to 'store' because the
1570    super-vector granularity is generally not representable with a single
1571    hardware register. A `vector.transfer_write` is thus a
1572    mid-level abstraction that supports super-vectorization with non-effecting
1573    padding for full-tile-only code. It is the responsibility of
1574    `vector.transfer_write`'s implementation to ensure the memory writes are
1575    valid. Different lowerings may be pertinent depending on the hardware
1576    support.
1577
1578    Example:
1579
1580    ```mlir
1581    // write vector<16x32x64xf32> into the slice
1582    //   `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
1583    for %i0 = 0 to %0 {
1584      affine.for %i1 = 0 to %1 step 32 {
1585        affine.for %i2 = 0 to %2 step 64 {
1586          affine.for %i3 = 0 to %3 step 16 {
1587            %val = `ssa-value` : vector<16x32x64xf32>
1588            vector.transfer_write %val, %A[%i0, %i1, %i2, %i3]
1589              {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
1590              vector<16x32x64xf32>, memref<?x?x?x?xf32>
1591    }}}}
1592
1593    // or equivalently (rewrite with vector.transpose)
1594    for %i0 = 0 to %0 {
1595      affine.for %i1 = 0 to %1 step 32 {
1596        affine.for %i2 = 0 to %2 step 64 {
1597          affine.for %i3 = 0 to %3 step 16 {
1598            %val = `ssa-value` : vector<16x32x64xf32>
1599            %valt = vector.transpose %val, [1, 2, 0] :
1600                  vector<16x32x64xf32> -> vector<32x64x16xf32>
1601            vector.transfer_write %valt, %A[%i0, %i1, %i2, %i3]
1602              {permutation_map: (d0, d1, d2, d3) -> (d1, d2, d3)} :
1603              vector<32x64x16xf32>, memref<?x?x?x?xf32>
1604    }}}}
1605
1606    // write to a memref with vector element type.
1607    vector.transfer_write %4, %arg1[%c3, %c3]
1608      {permutation_map = (d0, d1)->(d0, d1)}
1609        : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
1610
1611    // return a tensor where the vector is inserted into the source tensor.
1612    %5 = vector.transfer_write %4, %arg1[%c3, %c3]
1613      {permutation_map = (d0, d1)->(d0, d1)}
1614        : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
1615
1616    // Special encoding for 0-d transfer with 0-d tensor/memref, vector shape
1617    // {1} and permutation_map () -> (0).
1618    %1 = vector.transfer_write %0, %arg0[] {permutation_map = affine_map<()->(0)>} :
1619      vector<1xf32>, tensor<f32>
1620    ```
1621  }];
1622
1623  let builders = [
1624    /// 1. Builder with type inference.
1625    OpBuilder<(ins "Value":$vector,
1626                   "Value":$dest,
1627                   "ValueRange":$indices,
1628                   "AffineMapAttr":$permutationMapAttr,
1629                   "Value":$mask,
1630                   "ArrayAttr":$inBoundsAttr)>,
1631    /// 2. Builder with type inference that sets an empty mask (variant with attrs).
1632    OpBuilder<(ins "Value":$vector,
1633                   "Value":$dest,
1634                   "ValueRange":$indices,
1635                   "AffineMapAttr":$permutationMapAttr,
1636                   "ArrayAttr":$inBoundsAttr)>,
1637    /// 3. Builder with type inference that sets an empty mask (variant without attrs).
1638    OpBuilder<(ins "Value":$vector,
1639                   "Value":$dest,
1640                   "ValueRange":$indices,
1641                   "AffineMap":$permutationMap,
1642                   CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>,
1643    /// 4. Builder with type inference that sets an empty mask and sets permutation
1644    /// map to 'getMinorIdentityMap'.
1645    OpBuilder<(ins "Value":$vector,
1646                   "Value":$dest,
1647                   "ValueRange":$indices,
1648                   CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>,
1649  ];
1650
1651  let extraClassDeclaration = [{
1652    /// This method is added to maintain uniformity with load/store
1653    ///  ops of other dialects.
1654    Value getValue() { return getVector(); }
1655
1656    MutableOperandRange getDpsInitsMutable() { return getSourceMutable(); }
1657  }];
1658
1659  let hasFolder = 1;
1660  let hasCanonicalizer = 1;
1661  let hasCustomAssemblyFormat = 1;
1662  let hasVerifier = 1;
1663}
1664
1665def Vector_LoadOp : Vector_Op<"load"> {
1666  let summary = "reads an n-D slice of memory into an n-D vector";
1667  let description = [{
1668    The 'vector.load' operation reads an n-D slice of memory into an n-D
1669    vector. It takes a 'base' memref, an index for each memref dimension and a
1670    result vector type as arguments. It returns a value of the result vector
1671    type. The 'base' memref and indices determine the start memory address from
1672    which to read. Each index provides an offset for each memref dimension
1673    based on the element type of the memref. The shape of the result vector
1674    type determines the shape of the slice read from the start memory address.
1675    The elements along each dimension of the slice are strided by the memref
1676    strides. When loading more than 1 element, only unit strides are allowed
1677    along the most minor memref dimension. These constraints guarantee that
1678    elements read along the first dimension of the slice are contiguous in
1679    memory.
1680
1681    The memref element type can be a scalar or a vector type. If the memref
1682    element type is a scalar, it should match the element type of the result
1683    vector. If the memref element type is vector, it should match the result
1684    vector type.
1685
1686    Example: 0-D vector load on a scalar memref.
1687    ```mlir
1688    %result = vector.load %base[%i, %j] : memref<100x100xf32>, vector<f32>
1689    ```
1690
1691    Example: 1-D vector load on a scalar memref.
1692    ```mlir
1693    %result = vector.load %base[%i, %j] : memref<100x100xf32>, vector<8xf32>
1694    ```
1695
1696    Example: 1-D vector load on a vector memref.
1697    ```mlir
1698    %result = vector.load %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
1699    ```
1700
1701    Example:  2-D vector load on a scalar memref.
1702    ```mlir
1703    %result = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32>
1704    ```
1705
1706    Example:  2-D vector load on a vector memref.
1707    ```mlir
1708    %result = vector.load %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
1709    ```
1710
1711    Representation-wise, the 'vector.load' operation permits out-of-bounds
1712    reads. Support and implementation of out-of-bounds vector loads is
1713    target-specific. No assumptions should be made on the value of elements
1714    loaded out of bounds. Not all targets may support out-of-bounds vector
1715    loads.
1716
1717    Example:  Potential out-of-bound vector load.
1718    ```mlir
1719    %result = vector.load %memref[%index] : memref<?xf32>, vector<8xf32>
1720    ```
1721
1722    Example:  Explicit out-of-bound vector load.
1723    ```mlir
1724    %result = vector.load %memref[%c0] : memref<7xf32>, vector<8xf32>
1725    ```
1726  }];
1727
1728  let arguments = (ins Arg<AnyMemRef, "the reference to load from",
1729      [MemRead]>:$base,
1730      Variadic<Index>:$indices,
1731      DefaultValuedOptionalAttr<BoolAttr, "false">:$nontemporal);
1732  let results = (outs AnyVectorOfAnyRank:$result);
1733
1734  let extraClassDeclaration = [{
1735    MemRefType getMemRefType() {
1736      return ::llvm::cast<MemRefType>(getBase().getType());
1737    }
1738
1739    VectorType getVectorType() {
1740      return ::llvm::cast<VectorType>(getResult().getType());
1741    }
1742  }];
1743
1744  let hasFolder = 1;
1745  let hasVerifier = 1;
1746
1747  let assemblyFormat =
1748      "$base `[` $indices `]` attr-dict `:` type($base) `,` type($result)";
1749}
1750
1751def Vector_StoreOp : Vector_Op<"store"> {
1752  let summary = "writes an n-D vector to an n-D slice of memory";
1753  let description = [{
1754    The 'vector.store' operation writes an n-D vector to an n-D slice of memory.
1755    It takes the vector value to be stored, a 'base' memref and an index for
1756    each memref dimension. The 'base' memref and indices determine the start
1757    memory address from which to write. Each index provides an offset for each
1758    memref dimension based on the element type of the memref. The shape of the
1759    vector value to store determines the shape of the slice written from the
1760    start memory address. The elements along each dimension of the slice are
1761    strided by the memref strides. When storing more than 1 element, only unit
1762    strides are allowed along the most minor memref dimension. These constraints
1763    guarantee that elements written along the first dimension of the slice are
1764    contiguous in memory.
1765
1766    The memref element type can be a scalar or a vector type. If the memref
1767    element type is a scalar, it should match the element type of the value
1768    to store. If the memref element type is vector, it should match the type
1769    of the value to store.
1770
1771    Example: 0-D vector store on a scalar memref.
1772    ```mlir
1773    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<f32>
1774    ```
1775
1776    Example: 1-D vector store on a scalar memref.
1777    ```mlir
1778    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<8xf32>
1779    ```
1780
1781    Example: 1-D vector store on a vector memref.
1782    ```mlir
1783    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
1784    ```
1785
1786    Example:  2-D vector store on a scalar memref.
1787    ```mlir
1788    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32>
1789    ```
1790
1791    Example:  2-D vector store on a vector memref.
1792    ```mlir
1793    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
1794    ```
1795
1796    Representation-wise, the 'vector.store' operation permits out-of-bounds
1797    writes. Support and implementation of out-of-bounds vector stores are
1798    target-specific. No assumptions should be made on the memory written out of
1799    bounds. Not all targets may support out-of-bounds vector stores.
1800
1801    Example:  Potential out-of-bounds vector store.
1802    ```mlir
1803    vector.store %valueToStore, %memref[%index] : memref<?xf32>, vector<8xf32>
1804    ```
1805
1806    Example:  Explicit out-of-bounds vector store.
1807    ```mlir
1808    vector.store %valueToStore, %memref[%c0] : memref<7xf32>, vector<8xf32>
1809    ```
1810  }];
1811
1812  let arguments = (ins
1813      AnyVectorOfAnyRank:$valueToStore,
1814      Arg<AnyMemRef, "the reference to store to",
1815      [MemWrite]>:$base,
1816      Variadic<Index>:$indices,
1817      DefaultValuedOptionalAttr<BoolAttr, "false">:$nontemporal
1818  );
1819
1820  let extraClassDeclaration = [{
1821    MemRefType getMemRefType() {
1822      return ::llvm::cast<MemRefType>(getBase().getType());
1823    }
1824
1825    VectorType getVectorType() {
1826      return ::llvm::cast<VectorType>(getValueToStore().getType());
1827    }
1828  }];
1829
1830  let hasFolder = 1;
1831  let hasVerifier = 1;
1832
1833  let assemblyFormat = "$valueToStore `,` $base `[` $indices `]` attr-dict "
1834                       "`:` type($base) `,` type($valueToStore)";
1835}
1836
1837def Vector_MaskedLoadOp :
1838  Vector_Op<"maskedload">,
1839    Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
1840               Variadic<Index>:$indices,
1841               VectorOfNonZeroRankOf<[I1]>:$mask,
1842               AnyVectorOfNonZeroRank:$pass_thru)>,
1843    Results<(outs AnyVectorOfNonZeroRank:$result)> {
1844
1845  let summary = "loads elements from memory into a vector as defined by a mask vector";
1846
1847  let description = [{
1848    The masked load reads elements from memory into a vector as defined
1849    by a base with indices and a mask vector. When the mask is set, the
1850    element is read from memory. Otherwise, the corresponding element is taken
1851    from a pass-through vector. Informally the semantics are:
1852    ```
1853    result[0] := if mask[0] then base[i + 0] else pass_thru[0]
1854    result[1] := if mask[1] then base[i + 1] else pass_thru[1]
1855    etc.
1856    ```
1857
1858    If a mask bit is set and the corresponding index is out-of-bounds for the
1859    given base, the behavior is undefined. If a mask bit is not set, the value
1860    comes from the pass-through vector regardless of the index, and the index is
1861    allowed to be out-of-bounds.
1862
1863    The masked load can be used directly where applicable, or can be used
1864    during progressively lowering to bring other memory operations closer to
1865    hardware ISA support for a masked load. The semantics of the operation
1866    closely correspond to those of the `llvm.masked.load`
1867    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics).
1868
1869    Examples:
1870
1871    ```mlir
1872    %0 = vector.maskedload %base[%i], %mask, %pass_thru
1873       : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32>
1874
1875    %1 = vector.maskedload %base[%i, %j], %mask, %pass_thru
1876       : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1877    ```
1878  }];
1879  let extraClassDeclaration = [{
1880    MemRefType getMemRefType() {
1881      return ::llvm::cast<MemRefType>(getBase().getType());
1882    }
1883    VectorType getMaskVectorType() {
1884      return ::llvm::cast<VectorType>(getMask().getType());
1885    }
1886    VectorType getPassThruVectorType() {
1887      return ::llvm::cast<VectorType>(getPassThru().getType());
1888    }
1889    VectorType getVectorType() {
1890      return ::llvm::cast<VectorType>(getResult().getType());
1891    }
1892  }];
1893  let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` "
1894    "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)";
1895  let hasCanonicalizer = 1;
1896  let hasFolder = 1;
1897  let hasVerifier = 1;
1898}
1899
1900def Vector_MaskedStoreOp :
1901  Vector_Op<"maskedstore">,
1902    Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
1903               Variadic<Index>:$indices,
1904               VectorOfNonZeroRankOf<[I1]>:$mask,
1905               AnyVectorOfNonZeroRank:$valueToStore)> {
1906
1907  let summary = "stores elements from a vector into memory as defined by a mask vector";
1908
1909  let description = [{
1910    The masked store operation writes elements from a vector into memory
1911    as defined by a base with indices and a mask vector. When the mask is
1912    set, the corresponding element from the vector is written to memory. Otherwise,
1913    no action is taken for the element. Informally the semantics are:
1914    ```
1915    if (mask[0]) base[i+0] = value[0]
1916    if (mask[1]) base[i+1] = value[1]
1917    etc.
1918    ```
1919
1920    If a mask bit is set and the corresponding index is out-of-bounds for the
1921    given base, the behavior is undefined. If a mask bit is not set, no value
1922    is stored regardless of the index, and the index is allowed to be
1923    out-of-bounds.
1924
1925    The masked store can be used directly where applicable, or can be used
1926    during progressively lowering to bring other memory operations closer to
1927    hardware ISA support for a masked store. The semantics of the operation
1928    closely correspond to those of the `llvm.masked.store`
1929    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-store-intrinsics).
1930
1931    Examples:
1932
1933    ```mlir
1934    vector.maskedstore %base[%i], %mask, %value
1935      : memref<?xf32>, vector<8xi1>, vector<8xf32>
1936
1937    vector.maskedstore %base[%i, %j], %mask, %value
1938      : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
1939    ```
1940  }];
1941  let extraClassDeclaration = [{
1942    MemRefType getMemRefType() {
1943      return ::llvm::cast<MemRefType>(getBase().getType());
1944    }
1945    VectorType getMaskVectorType() {
1946      return ::llvm::cast<VectorType>(getMask().getType());
1947    }
1948    VectorType getVectorType() {
1949      return ::llvm::cast<VectorType>(getValueToStore().getType());
1950    }
1951  }];
1952  let assemblyFormat =
1953      "$base `[` $indices `]` `,` $mask `,` $valueToStore "
1954      "attr-dict `:` type($base) `,` type($mask) `,` type($valueToStore)";
1955  let hasCanonicalizer = 1;
1956  let hasFolder = 1;
1957  let hasVerifier = 1;
1958}
1959
1960def Vector_GatherOp :
1961  Vector_Op<"gather", [
1962    DeclareOpInterfaceMethods<MaskableOpInterface>,
1963    DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
1964  ]>,
1965    Arguments<(ins Arg<AnyShaped, "", [MemRead]>:$base,
1966               Variadic<Index>:$indices,
1967               VectorOfNonZeroRankOf<[AnyInteger, Index]>:$index_vec,
1968               VectorOfNonZeroRankOf<[I1]>:$mask,
1969               AnyVectorOfNonZeroRank:$pass_thru)>,
1970    Results<(outs AnyVectorOfNonZeroRank:$result)> {
1971
1972  let summary = [{
1973    gathers elements from memory or ranked tensor into a vector as defined by an
1974    index vector and a mask vector
1975  }];
1976
1977  let description = [{
1978    The gather operation returns an n-D vector whose elements are either loaded
1979    from memory or ranked tensor, or taken from a pass-through vector, depending
1980    on the values of an n-D mask vector.
1981    If a mask bit is set, the corresponding result element is defined by the base
1982    with indices and the n-D index vector (each index is a 1-D offset on the base).
1983    Otherwise, the corresponding element is taken from the n-D pass-through vector.
1984    Informally the semantics are:
1985    ```
1986    result[0] := if mask[0] then base[index[0]] else pass_thru[0]
1987    result[1] := if mask[1] then base[index[1]] else pass_thru[1]
1988    etc.
1989    ```
1990
1991    If a mask bit is set and the corresponding index is out-of-bounds for the
1992    given base, the behavior is undefined. If a mask bit is not set, the value
1993    comes from the pass-through vector regardless of the index, and the index is
1994    allowed to be out-of-bounds.
1995
1996    The gather operation can be used directly where applicable, or can be used
1997    during progressively lowering to bring other memory operations closer to
1998    hardware ISA support for a gather.
1999
2000    Examples:
2001
2002    ```mlir
2003    %0 = vector.gather %base[%c0][%v], %mask, %pass_thru
2004       : memref<?xf32>, vector<2x16xi32>, vector<2x16xi1>, vector<2x16xf32> into vector<2x16xf32>
2005
2006    %1 = vector.gather %base[%i, %j][%v], %mask, %pass_thru
2007       : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2008    ```
2009  }];
2010
2011  let extraClassDeclaration = [{
2012    ShapedType getBaseType() { return getBase().getType(); }
2013    VectorType getIndexVectorType() { return getIndexVec().getType(); }
2014    VectorType getMaskVectorType() { return getMask().getType(); }
2015    VectorType getPassThruVectorType() { return getPassThru().getType(); }
2016    VectorType getVectorType() { return getResult().getType(); }
2017  }];
2018
2019  let assemblyFormat =
2020    "$base `[` $indices `]` `[` $index_vec `]` `,` "
2021    "$mask `,` $pass_thru attr-dict `:` type($base) `,` "
2022    "type($index_vec)  `,` type($mask) `,` type($pass_thru) "
2023    "`into` type($result)";
2024  let hasCanonicalizer = 1;
2025  let hasVerifier = 1;
2026}
2027
2028def Vector_ScatterOp :
2029  Vector_Op<"scatter">,
2030    Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
2031               Variadic<Index>:$indices,
2032               VectorOfRankAndType<[1], [AnyInteger, Index]>:$index_vec,
2033               VectorOfRankAndType<[1], [I1]>:$mask,
2034               VectorOfRank<[1]>:$valueToStore)> {
2035
2036  let summary = [{
2037    scatters elements from a vector into memory as defined by an index vector
2038    and a mask vector
2039  }];
2040
2041  let description = [{
2042    The scatter operation stores elements from a 1-D vector into memory as
2043    defined by a base with indices and an additional 1-D index vector, but
2044    only if the corresponding bit in a 1-D mask vector is set. Otherwise, no
2045    action is taken for that element. Informally the semantics are:
2046    ```
2047    if (mask[0]) base[index[0]] = value[0]
2048    if (mask[1]) base[index[1]] = value[1]
2049    etc.
2050    ```
2051
2052    If a mask bit is set and the corresponding index is out-of-bounds for the
2053    given base, the behavior is undefined. If a mask bit is not set, no value
2054    is stored regardless of the index, and the index is allowed to be
2055    out-of-bounds.
2056
2057    If the index vector contains two or more duplicate indices, the behavior is
2058    undefined. Underlying implementation may enforce strict sequential
2059    semantics.
2060    TODO: always enforce strict sequential semantics?
2061
2062    The scatter operation can be used directly where applicable, or can be used
2063    during progressively lowering to bring other memory operations closer to
2064    hardware ISA support for a scatter. The semantics of the operation closely
2065    correspond to those of the `llvm.masked.scatter`
2066    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-scatter-intrinsics).
2067
2068    Examples:
2069
2070    ```mlir
2071    vector.scatter %base[%c0][%v], %mask, %value
2072        : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
2073
2074    vector.scatter %base[%i, %j][%v], %mask, %value
2075        : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
2076    ```
2077  }];
2078
2079  let extraClassDeclaration = [{
2080    MemRefType getMemRefType() { return getBase().getType(); }
2081    VectorType getIndexVectorType() { return getIndexVec().getType(); }
2082    VectorType getMaskVectorType() { return getMask().getType(); }
2083    VectorType getVectorType() { return getValueToStore().getType(); }
2084  }];
2085
2086  let assemblyFormat =
2087      "$base `[` $indices `]` `[` $index_vec `]` `,` "
2088      "$mask `,` $valueToStore attr-dict `:` type($base) `,` "
2089      "type($index_vec)  `,` type($mask) `,` type($valueToStore)";
2090  let hasCanonicalizer = 1;
2091  let hasVerifier = 1;
2092}
2093
2094def Vector_ExpandLoadOp :
2095  Vector_Op<"expandload">,
2096    Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
2097               Variadic<Index>:$indices,
2098               FixedVectorOfNonZeroRankOf<[I1]>:$mask,
2099               AnyVectorOfNonZeroRank:$pass_thru)>,
2100    Results<(outs AnyVectorOfNonZeroRank:$result)> {
2101
2102  let summary = "reads elements from memory and spreads them into a vector as defined by a mask";
2103
2104  let description = [{
2105    The expand load reads elements from memory into a vector as defined by a
2106    base with indices and a mask vector. Expansion only applies to the innermost
2107    dimension. When the mask is set, the next element is read from memory.
2108    Otherwise, the corresponding element is taken from a pass-through vector.
2109    Informally the semantics are:
2110
2111    ```
2112    index = i
2113    result[0] := if mask[0] then base[index++] else pass_thru[0]
2114    result[1] := if mask[1] then base[index++] else pass_thru[1]
2115    etc.
2116    ```
2117
2118    Note that the index increment is done conditionally.
2119
2120    If a mask bit is set and the corresponding index is out-of-bounds for the
2121    given base, the behavior is undefined. If a mask bit is not set, the value
2122    comes from the pass-through vector regardless of the index, and the index is
2123    allowed to be out-of-bounds.
2124
2125    The expand load can be used directly where applicable, or can be used
2126    during progressively lowering to bring other memory operations closer to
2127    hardware ISA support for an expand. The semantics of the operation closely
2128    correspond to those of the `llvm.masked.expandload`
2129    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-expandload-intrinsics).
2130
2131    Note, at the moment this Op is only available for fixed-width vectors.
2132
2133    Examples:
2134
2135    ```mlir
2136    %0 = vector.expandload %base[%i], %mask, %pass_thru
2137       : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32>
2138
2139    %1 = vector.expandload %base[%i, %j], %mask, %pass_thru
2140       : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2141    ```
2142  }];
2143  let extraClassDeclaration = [{
2144    MemRefType getMemRefType() {
2145      return ::llvm::cast<MemRefType>(getBase().getType());
2146    }
2147    VectorType getMaskVectorType() {
2148      return ::llvm::cast<VectorType>(getMask().getType());
2149    }
2150    VectorType getPassThruVectorType() {
2151      return ::llvm::cast<VectorType>(getPassThru().getType());
2152    }
2153    VectorType getVectorType() {
2154      return ::llvm::cast<VectorType>(getResult().getType());
2155    }
2156  }];
2157  let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` "
2158    "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)";
2159  let hasCanonicalizer = 1;
2160  let hasVerifier = 1;
2161}
2162
2163def Vector_CompressStoreOp :
2164  Vector_Op<"compressstore">,
2165    Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
2166               Variadic<Index>:$indices,
2167               FixedVectorOfNonZeroRankOf<[I1]>:$mask,
2168               AnyVectorOfNonZeroRank:$valueToStore)> {
2169
2170  let summary = "writes elements selectively from a vector as defined by a mask";
2171
2172  let description = [{
2173    The compress store operation writes elements from a vector into memory as
2174    defined by a base with indices and a mask vector. Compression only applies
2175    to the innermost dimension. When the mask is set, the corresponding element
2176    from the vector is written next to memory.  Otherwise, no action is taken
2177    for the element. Informally the semantics are:
2178
2179    ```
2180    index = i
2181    if (mask[0]) base[index++] = value[0]
2182    if (mask[1]) base[index++] = value[1]
2183    etc.
2184    ```
2185
2186    Note that the index increment is done conditionally.
2187
2188    If a mask bit is set and the corresponding index is out-of-bounds for the
2189    given base, the behavior is undefined. If a mask bit is not set, no value
2190    is stored regardless of the index, and the index is allowed to be
2191    out-of-bounds.
2192
2193    The compress store can be used directly where applicable, or can be used
2194    during progressively lowering to bring other memory operations closer to
2195    hardware ISA support for a compress. The semantics of the operation closely
2196    correspond to those of the `llvm.masked.compressstore`
2197    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-compressstore-intrinsics).
2198
2199    Note, at the moment this Op is only available for fixed-width vectors.
2200
2201    Examples:
2202
2203    ```mlir
2204    vector.compressstore %base[%i], %mask, %value
2205      : memref<?xf32>, vector<8xi1>, vector<8xf32>
2206
2207    vector.compressstore %base[%i, %j], %mask, %value
2208      : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
2209    ```
2210  }];
2211  let extraClassDeclaration = [{
2212    MemRefType getMemRefType() {
2213      return ::llvm::cast<MemRefType>(getBase().getType());
2214    }
2215    VectorType getMaskVectorType() {
2216      return ::llvm::cast<VectorType>(getMask().getType());
2217    }
2218    VectorType getVectorType() {
2219      return ::llvm::cast<VectorType>(getValueToStore().getType());
2220    }
2221  }];
2222  let assemblyFormat =
2223      "$base `[` $indices `]` `,` $mask `,` $valueToStore attr-dict `:` "
2224      "type($base) `,` type($mask) `,` type($valueToStore)";
2225  let hasCanonicalizer = 1;
2226  let hasVerifier = 1;
2227}
2228
2229def Vector_ShapeCastOp :
2230  Vector_Op<"shape_cast", [Pure,
2231    DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>
2232  ]>,
2233    Arguments<(ins AnyVectorOfAnyRank:$source)>,
2234    Results<(outs AnyVectorOfAnyRank:$result)> {
2235  let summary = "shape_cast casts between vector shapes";
2236  let description = [{
2237    The shape_cast operation casts between an n-D source vector shape and
2238    a k-D result vector shape (the element type remains the same).
2239
2240    If reducing rank (n > k), result dimension sizes must be a product
2241    of contiguous source dimension sizes.
2242    If expanding rank (n < k), source dimensions must factor into a
2243    contiguous sequence of destination dimension sizes.
2244    Each source dim is expanded (or contiguous sequence of source dims combined)
2245    in source dimension list order (i.e. 0 <= i < n), to produce a contiguous
2246    sequence of result dims (or a single result dim), in result dimension list
2247    order (i.e. 0 <= j < k). The product of all source dimension sizes and all
2248    result dimension sizes must match.
2249
2250    It is currently assumed that this operation does not require moving data,
2251    and that it will be folded away before lowering vector operations.
2252
2253    There is an exception to the folding expectation when targeting
2254    llvm.intr.matrix operations. We need a type conversion back and forth from a
2255    2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM
2256    is supported in that particular case, for now.
2257
2258    Example:
2259
2260    ```mlir
2261    // Example casting to a lower vector rank.
2262    %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32>
2263
2264    // Example casting to a higher vector rank.
2265    %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32>
2266
2267    ```
2268  }];
2269  let extraClassDeclaration = [{
2270    VectorType getSourceVectorType() {
2271      return ::llvm::cast<VectorType>(getSource().getType());
2272    }
2273    VectorType getResultVectorType() {
2274      return ::llvm::cast<VectorType>(getResult().getType());
2275    }
2276  }];
2277  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
2278  let hasFolder = 1;
2279  let hasCanonicalizer = 1;
2280  let hasVerifier = 1;
2281}
2282
2283def Vector_BitCastOp :
2284  Vector_Op<"bitcast", [Pure, AllRanksMatch<["source", "result"]>]>,
2285    Arguments<(ins AnyVectorOfAnyRank:$source)>,
2286    Results<(outs AnyVectorOfAnyRank:$result)>{
2287  let summary = "bitcast casts between vectors";
2288  let description = [{
2289    The bitcast operation casts between vectors of the same rank, the minor 1-D
2290    vector size is casted to a vector with a different element type but same
2291    bitwidth. In case of 0-D vectors, the bitwidth of element types must be
2292    equal.
2293
2294    Example:
2295
2296    ```mlir
2297    // Example casting to a smaller element type.
2298    %1 = vector.bitcast %0 : vector<5x1x4x3xf32> to vector<5x1x4x6xi16>
2299
2300    // Example casting to a bigger element type.
2301    %3 = vector.bitcast %2 : vector<10x12x8xi8> to vector<10x12x2xi32>
2302
2303    // Example casting to an element type of the same size.
2304    %5 = vector.bitcast %4 : vector<5x1x4x3xf32> to vector<5x1x4x3xi32>
2305
2306    // Example casting of 0-D vectors.
2307    %7 = vector.bitcast %6 : vector<f32> to vector<i32>
2308    ```
2309  }];
2310  let extraClassDeclaration = [{
2311    VectorType getSourceVectorType() {
2312      return ::llvm::cast<VectorType>(getSource().getType());
2313    }
2314    VectorType getResultVectorType() {
2315      return ::llvm::cast<VectorType>(getResult().getType());
2316    }
2317  }];
2318  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
2319  let hasFolder = 1;
2320  let hasVerifier = 1;
2321}
2322
2323def Vector_TypeCastOp :
2324  Vector_Op<"type_cast", [Pure, ViewLikeOpInterface]>,
2325    Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>,
2326    Results<(outs AnyMemRef:$result)> {
2327  let summary = "type_cast op converts a scalar memref to a vector memref";
2328  let description = [{
2329    Performs a conversion from a memref with scalar element to a memref with a
2330    *single* vector element, copying the shape of the memref to the vector. This
2331    is the minimal viable operation that is required to makeke
2332    super-vectorization operational. It can be seen as a special case of the
2333    `view` operation but scoped in the super-vectorization context.
2334
2335    Example:
2336
2337    ```mlir
2338    %A  = memref.alloc() : memref<5x4x3xf32>
2339    %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>>
2340    ```
2341  }];
2342
2343  /// Build the canonical memRefType with a single vector.
2344  /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref<vector<4 x 5 x 6 x f32>>.
2345  let builders = [OpBuilder<(ins "Value":$source)>];
2346
2347  let extraClassDeclaration = [{
2348    MemRefType getMemRefType() {
2349      return ::llvm::cast<MemRefType>(getMemref().getType());
2350    }
2351    MemRefType getResultMemRefType() {
2352      return ::llvm::cast<MemRefType>(getResult().getType());
2353    }
2354    // Implement ViewLikeOpInterface.
2355    Value getViewSource() { return getMemref(); }
2356  }];
2357
2358  let assemblyFormat = [{
2359    $memref attr-dict `:` type($memref) `to` type($result)
2360  }];
2361  let hasVerifier = 1;
2362}
2363
2364def Vector_ConstantMaskOp :
2365  Vector_Op<"constant_mask", [Pure]>,
2366    Arguments<(ins DenseI64ArrayAttr:$mask_dim_sizes)>,
2367    Results<(outs VectorOfAnyRankOf<[I1]>)> {
2368  let summary = "creates a constant vector mask";
2369  let description = [{
2370    Creates and returns a vector mask where elements of the result vector
2371    are set to '0' or '1', based on whether the element indices are contained
2372    within a hyper-rectangular region specified by the 'mask_dim_sizes'
2373    array attribute argument. Each element of the 'mask_dim_sizes' array,
2374    specifies an exclusive upper bound [0, mask-dim-size-element-value)
2375    for a unique dimension in the vector result. The conjunction of the ranges
2376    define a hyper-rectangular region within which elements values are set to 1
2377    (otherwise element values are set to 0). Each value of 'mask_dim_sizes' must
2378    be non-negative and not greater than the size of the corresponding vector
2379    dimension (as opposed to vector.create_mask which allows this). Sizes that
2380    correspond to scalable dimensions are implicitly multiplied by vscale,
2381    though currently only zero (none set) or the size of the dim/vscale
2382    (all set) are supported.
2383
2384    Example:
2385
2386    ```mlir
2387    // create a constant vector mask of size 4x3xi1 with elements in range
2388    // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
2389    %1 = vector.constant_mask [3, 2] : vector<4x3xi1>
2390
2391    print %1
2392                  columns
2393                0    1    2
2394              |------------
2395            0 | 1    1    0
2396      rows  1 | 1    1    0
2397            2 | 1    1    0
2398            3 | 0    0    0
2399    ```
2400  }];
2401
2402  let builders = [
2403    // Build with mixed static/dynamic operands.
2404    OpBuilder<(ins "VectorType":$type, "ConstantMaskKind":$kind)>
2405  ];
2406
2407  let extraClassDeclaration = [{
2408    /// Return the result type of this op.
2409    VectorType getVectorType() {
2410      return cast<VectorType>(getOperation()->getResultTypes()[0]);
2411    }
2412
2413    /// Return whether the mask is a uniform vector of `1`s.
2414    bool isAllOnesMask();
2415  }];
2416
2417  let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)";
2418  let hasVerifier = 1;
2419}
2420
2421def Vector_CreateMaskOp :
2422  Vector_Op<"create_mask", [Pure]>,
2423    Arguments<(ins Variadic<Index>:$operands)>,
2424    Results<(outs VectorOfAnyRankOf<[I1]>)> {
2425  let summary = "creates a vector mask";
2426  let description = [{
2427    Creates and returns a vector mask where elements of the result vector
2428    are set to '0' or '1', based on whether the element indices are contained
2429    within a hyper-rectangular region specified by the operands. Specifically,
2430    each operand specifies a range [0, operand-value) for a unique dimension in
2431    the vector result. The conjunction of the operand ranges define a
2432    hyper-rectangular region within which elements values are set to 1
2433    (otherwise element values are set to 0). If operand-value is negative, it is
2434    treated as if it were zero, and if it is greater than the corresponding
2435    dimension size, it is treated as if it were equal to the dimension size.
2436
2437    Example:
2438
2439    ```mlir
2440    // create a vector mask of size 4x3xi1 where elements in range
2441    // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
2442    %1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
2443
2444    print %1
2445                  columns
2446                0    1    2
2447              |------------
2448            0 | 1    1    0
2449      rows  1 | 1    1    0
2450            2 | 1    1    0
2451            3 | 0    0    0
2452    ```
2453  }];
2454
2455  let builders = [
2456    // Build with mixed static/dynamic operands.
2457    OpBuilder<(ins "VectorType":$type, "ArrayRef<OpFoldResult>":$mixedOperands)>
2458  ];
2459
2460  let extraClassDeclaration = [{
2461    /// Return the result type of this op.
2462    VectorType getVectorType() {
2463      return cast<VectorType>(getOperation()->getResultTypes()[0]);
2464    }
2465  }];
2466
2467  let hasCanonicalizer = 1;
2468  let hasVerifier = 1;
2469  let assemblyFormat = "$operands attr-dict `:` type(results)";
2470}
2471
2472def Vector_MaskOp : Vector_Op<"mask", [
2473  SingleBlockImplicitTerminator<"vector::YieldOp">,
2474  DeclareOpInterfaceMethods<MaskingOpInterface>,
2475  RecursiveMemoryEffects, NoRegionArguments
2476]> {
2477  let summary = "Predicates a maskable vector operation";
2478  let description = [{
2479    The `vector.mask` is a `MaskingOpInterface` operation that predicates the
2480    execution of another operation. It takes an `i1` vector mask and an
2481    optional passthru vector as arguments.
2482
2483    A implicitly `vector.yield`-terminated region encloses the operation to be
2484    masked. Values used within the region are captured from above. Only one
2485    *maskable* operation can be masked with a `vector.mask` operation at a time.
2486    An operation is *maskable* if it implements the `MaskableOpInterface`. The
2487    terminator yields all results of the maskable operation to the result of
2488    this operation.
2489
2490    The vector mask argument holds a bit for each vector lane and determines
2491    which vector lanes should execute the maskable operation and which ones
2492    should not. The `vector.mask` operation returns the value produced by the
2493    masked execution of the nested operation, if any. The masked-off lanes in
2494    the result vector are taken from the corresponding lanes of the pass-thru
2495    argument, if provided, or left unmodified, otherwise. At this point, 0-D
2496    vectors are not supported by `vector.mask`. They may be supported in the
2497    future.
2498
2499    The `vector.mask` operation does not prescribe how a maskable operation
2500    should be masked or how a masked operation should be lowered. Masking
2501    constraints and some semantic details are provided by each maskable
2502    operation through the `MaskableOpInterface`. Lowering of masked operations
2503    is implementation defined. For instance, scalarizing the masked operation
2504    or executing the operation for the masked-off lanes are valid lowerings as
2505    long as the execution of masked-off lanes does not change the observable
2506    behavior of the program.
2507
2508    Examples:
2509
2510    ```
2511      %0 = vector.mask %mask { vector.reduction <add>, %a : vector<8xi32> into i32 } : vector<8xi1> -> i32
2512    ```
2513
2514    ```
2515      %0 = vector.mask %mask, %passthru { arith.divsi %a, %b : vector<8xi32> } : vector<8xi1> -> vector<8xi32>
2516    ```
2517
2518    ```
2519      vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, memref<?xf32> } : vector<16xi1>
2520    ```
2521
2522    ```
2523      vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, tensor<?xf32> } : vector<16xi1> -> tensor<?xf32>
2524    ```
2525  }];
2526
2527  // TODO: Support multiple passthru values.
2528  let arguments = (ins VectorOfNonZeroRankOf<[I1]>:$mask,
2529                   Optional<AnyType>:$passthru);
2530  let results = (outs Variadic<AnyType>:$results);
2531  let regions = (region SizedRegion<1>:$maskRegion);
2532
2533  let skipDefaultBuilders = 1;
2534  let builders = [
2535    OpBuilder<(ins "Value":$mask, "Operation *":$maskableOp,
2536                   CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)>,
2537    OpBuilder<(ins "TypeRange":$resultTypes, "Value":$mask, "Operation *":$maskableOp,
2538                   CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)>,
2539    OpBuilder<(ins "TypeRange":$resultTypes, "Value":$mask, "Value":$passthru,
2540                   "Operation *":$maskableOp,
2541                   CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)>
2542  ];
2543
2544  let extraClassDeclaration = [{
2545    Block *getMaskBlock() { return &getMaskRegion().front(); }
2546
2547    /// Returns true if mask op is not masking any operation.
2548    bool isEmpty() {
2549      Block *block = getMaskBlock();
2550      if (block->getOperations().size() > 1)
2551        return false;
2552      return true;
2553    }
2554
2555    static void ensureTerminator(Region &region, Builder &builder,
2556                                 Location loc);
2557  }];
2558
2559  let hasCanonicalizer = 1;
2560  let hasFolder = 1;
2561  let hasCustomAssemblyFormat = 1;
2562  let hasVerifier = 1;
2563}
2564
2565def Vector_TransposeOp :
2566  Vector_Op<"transpose", [Pure,
2567    DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
2568    PredOpTrait<"operand and result have same element type",
2569                 TCresVTEtIsSameAsOpBase<0, 0>>]> {
2570  let summary = "vector transpose operation";
2571  let description = [{
2572    Takes a n-D vector and returns the transposed n-D vector defined by
2573    the permutation of ranks in the n-sized integer array attribute (in case
2574    of 0-D vectors the array attribute must be empty).
2575
2576    In the operation
2577
2578    ```mlir
2579    %1 = vector.transpose %0, [i_1, .., i_n]
2580      : vector<d_1 x .. x d_n x f32>
2581      to vector<d_trans[0] x .. x d_trans[n-1] x f32>
2582    ```
2583
2584    the `permutation` array [i_1, .., i_n] must be a permutation of [0, .., n-1].
2585
2586    Example:
2587
2588    ```mlir
2589    %1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
2590
2591     [ [a, b, c],       [ [a, d],
2592       [d, e, f] ]  ->    [b, e],
2593                          [c, f] ]
2594    ```
2595  }];
2596
2597  let arguments = (ins AnyVectorOfAnyRank:$vector,
2598                       DenseI64ArrayAttr:$permutation);
2599  let results = (outs AnyVectorOfAnyRank:$result);
2600
2601  let builders = [
2602    OpBuilder<(ins "Value":$vector, "ArrayRef<int64_t>":$permutation)>
2603  ];
2604  let extraClassDeclaration = [{
2605    VectorType getSourceVectorType() {
2606      return ::llvm::cast<VectorType>(getVector().getType());
2607    }
2608    VectorType getResultVectorType() {
2609      return ::llvm::cast<VectorType>(getResult().getType());
2610    }
2611  }];
2612  let assemblyFormat = [{
2613    $vector `,` $permutation attr-dict `:` type($vector) `to` type($result)
2614  }];
2615  let hasCanonicalizer = 1;
2616  let hasFolder = 1;
2617  let hasVerifier = 1;
2618}
2619
2620def Vector_PrintOp :
2621  Vector_Op<"print", [
2622    MemoryEffects<[MemWrite]>,
2623    PredOpTrait<
2624      "`source` or `punctuation` are not set when printing strings",
2625      CPred<"!getStringLiteral() || (!getSource() && getPunctuation() == PrintPunctuation::NewLine)">
2626    >,
2627  ]>,
2628  Arguments<(ins Optional<Type<Or<[
2629    AnyVectorOfAnyRank.predicate,
2630    AnyInteger.predicate, Index.predicate, AnyFloat.predicate
2631  ]>>>:$source, DefaultValuedAttr<Vector_PrintPunctuation,
2632                      "::mlir::vector::PrintPunctuation::NewLine">:$punctuation,
2633                OptionalAttr<Builtin_StringAttr>:$stringLiteral)
2634  > {
2635  let summary = "print operation (for testing and debugging)";
2636  let description = [{
2637    Prints the source vector (or scalar) to stdout in a human-readable format
2638    (for testing and debugging). No return value.
2639
2640    Example:
2641
2642    ```mlir
2643    %v = arith.constant dense<0.0> : vector<4xf32>
2644    vector.print %v : vector<4xf32>
2645    ```
2646
2647    When lowered to LLVM, the vector print is decomposed into elementary
2648    printing method calls that at runtime will yield:
2649
2650    ```
2651    ( 0.0, 0.0, 0.0, 0.0 )
2652    ```
2653
2654    This is printed to stdout via a small runtime support library, which only
2655    needs to provide a few printing methods (single value for all data
2656    types, opening/closing bracket, comma, newline).
2657
2658    By default `vector.print` adds a newline after the vector, but this can be
2659    controlled by the `punctuation` attribute. For example, to print a comma
2660    after instead do:
2661
2662    ```mlir
2663    vector.print %v : vector<4xf32> punctuation <comma>
2664    ```
2665
2666    Note that it is possible to use the punctuation attribute alone. The
2667    following will print a single newline:
2668
2669    ```mlir
2670    vector.print punctuation <newline>
2671    ```
2672
2673    Additionally, to aid with debugging and testing `vector.print` can also
2674    print constant strings:
2675
2676    ```mlir
2677    vector.print str "Hello, World!"
2678    ```
2679  }];
2680  let extraClassDeclaration = [{
2681    Type getPrintType() {
2682      return getSource().getType();
2683    }
2684  }];
2685  let builders = [
2686    OpBuilder<(ins "PrintPunctuation":$punctuation), [{
2687      build($_builder, $_state, {}, punctuation, {});
2688    }]>,
2689    OpBuilder<(ins "::mlir::Value":$source), [{
2690      build($_builder, $_state, source, PrintPunctuation::NewLine);
2691    }]>,
2692    OpBuilder<(ins "::mlir::Value":$source, "PrintPunctuation":$punctuation), [{
2693      build($_builder, $_state, source, punctuation, {});
2694    }]>,
2695    OpBuilder<(ins "::llvm::StringRef":$string), [{
2696      build($_builder, $_state, {}, PrintPunctuation::NewLine, $_builder.getStringAttr(string));
2697    }]>,
2698  ];
2699
2700  let assemblyFormat = [{
2701      ($source^ `:` type($source))?
2702        oilist(
2703            `str` $stringLiteral
2704          | `punctuation` $punctuation)
2705        attr-dict
2706    }];
2707}
2708
2709//===----------------------------------------------------------------------===//
2710// Ops used for supporting progressive lowering and conversion type changes.
2711// The Ops are typically not used directly by higher level dialects, but are
2712// used by intra-dialect rewriting rules to bring vector operations closer
2713// to the hardware ISA.
2714//===----------------------------------------------------------------------===//
2715
2716/// Vector dialect matrix multiplication op that operates on flattened 1-D
2717/// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR.
2718/// This may seem redundant with vector.contract but it serves the purposes of
2719/// more progressive lowering and localized type conversion on the path:
2720///   `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
2721def Vector_MatmulOp : Vector_Op<"matrix_multiply", [Pure,
2722        PredOpTrait<"lhs operand and result have same element type",
2723                    TCresVTEtIsSameAsOpBase<0, 0>>,
2724        PredOpTrait<"rhs operand and result have same element type",
2725                    TCresVTEtIsSameAsOpBase<0, 1>>]>,
2726      Arguments<(
2727        // TODO: tighten vector element types that make sense.
2728        ins FixedVectorOfRankAndType<[1],
2729              [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$lhs,
2730            FixedVectorOfRankAndType<[1],
2731              [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$rhs,
2732            I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>,
2733      Results<(
2734        outs FixedVectorOfRankAndType<[1],
2735               [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$res)>
2736{
2737  let summary = "Vector matrix multiplication op that operates on flattened 1-D"
2738    " MLIR vectors";
2739  let description = [{
2740    This is the counterpart of llvm.matrix.multiply in MLIR. It serves the
2741    purposes of more progressive lowering and localized type conversion.
2742    Higher levels typically lower matrix multiplications into 'vector.contract'
2743    operations. Subsequent rewriting rule progressively lower these operations
2744    into 'vector.matrix_multiply' operations to bring the operations closer
2745    to the hardware ISA.
2746
2747    The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows
2748    and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and
2749    <rhs_columns> and multiplies them. The result matrix is returned embedded in
2750    the result vector.
2751
2752    Note, the corresponding LLVM intrinsic, `@llvm.matrix.multiply.*`, does not
2753    support scalable vectors. Hence, this Op is only available for fixed-width
2754    vectors. Also see:
2755
2756    http://llvm.org/docs/LangRef.html#llvm-matrix-multiply-intrinsic
2757
2758    Example:
2759
2760    ```mlir
2761    %C = vector.matrix_multiply %A, %B
2762      { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
2763      (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
2764    ```
2765  }];
2766  let builders = [
2767   OpBuilder<(ins "Value":$lhs, "Value":$rhs, "unsigned":$lhsRows,
2768     "unsigned":$lhsColumns, "unsigned":$rhsColumns),
2769   [{
2770     $_state.addOperands({lhs, rhs});
2771     $_state.addAttribute("lhs_rows",$_builder.getI32IntegerAttr(lhsRows));
2772     $_state.addAttribute("lhs_columns",$_builder.getI32IntegerAttr(lhsColumns));
2773     $_state.addAttribute("rhs_columns",$_builder.getI32IntegerAttr(rhsColumns));
2774     $_state.addTypes(VectorType::get(lhsRows * rhsColumns,
2775       ::llvm::cast<VectorType>(lhs.getType()).getElementType()));
2776   }]>,
2777  ];
2778  let assemblyFormat = "$lhs `,` $rhs attr-dict "
2779    "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)";
2780}
2781
2782/// Vector dialect matrix transposition op that operates on flattened 1-D
2783/// MLIR vectors. This is the counterpart of llvm.matrix.transpose in MLIR.
2784/// This may seem redundant with vector.transpose but it serves the purposes of
2785/// more progressive lowering and localized type conversion on the path:
2786///   `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
2787def Vector_FlatTransposeOp : Vector_Op<"flat_transpose", [Pure,
2788  PredOpTrait<"source operand and result have same element type",
2789                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
2790    Arguments<(
2791      // TODO: tighten vector element types that make sense.
2792      ins FixedVectorOfRankAndType<[1],
2793            [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$matrix,
2794          I32Attr:$rows, I32Attr:$columns)>,
2795    Results<(
2796      outs FixedVectorOfRankAndType<[1],
2797             [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$res)> {
2798  let summary = "Vector matrix transposition on flattened 1-D MLIR vectors";
2799  let description = [{
2800    This is the counterpart of llvm.matrix.transpose in MLIR. It serves
2801    the purposes of more progressive lowering and localized type conversion.
2802    Higher levels typically lower matrix transpositions into 'vector.transpose'
2803    operations. Subsequent rewriting rule progressively lower these operations
2804    into 'vector.flat_transpose' operations to bring the operations closer
2805    to the hardware ISA.
2806
2807    The `vector.flat_transpose` op treats the 1-D input `matrix` as
2808    a 2-D matrix with <rows> rows and <columns> columns, and returns the
2809    transposed matrix in flattened form in 'res'.
2810
2811    Note, the corresponding LLVM intrinsic, `@llvm.matrix.transpose.*`, does not
2812    support scalable vectors. Hence, this Op is only available for fixed-width
2813    vectors. Also see:
2814
2815    http://llvm.org/docs/LangRef.html#llvm-matrix-transpose-intrinsic
2816
2817    Example:
2818
2819    ```mlir
2820    %1 = vector.flat_transpose %0 {columns = 4 : i32, rows = 4 : i32}
2821       : vector<16xf32> -> vector<16xf32>
2822    ```
2823  }];
2824  let assemblyFormat = "$matrix attr-dict `:` type($matrix) `->` type($res)";
2825}
2826
2827//===----------------------------------------------------------------------===//
2828// SplatOp
2829//===----------------------------------------------------------------------===//
2830
2831def Vector_SplatOp : Vector_Op<"splat", [
2832    Pure,
2833    DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
2834    TypesMatchWith<"operand type matches element type of result",
2835                   "aggregate", "input",
2836                   "::llvm::cast<VectorType>($_self).getElementType()">
2837  ]> {
2838  let summary = "vector splat or broadcast operation";
2839  let description = [{
2840    Broadcast the operand to all elements of the result vector. The operand is
2841    required to be of integer/index/float type.
2842
2843    Example:
2844
2845    ```mlir
2846    %s = arith.constant 10.1 : f32
2847    %t = vector.splat %s : vector<8x16xf32>
2848    ```
2849  }];
2850
2851  let arguments = (ins AnyTypeOf<[AnySignlessInteger, Index, AnyFloat],
2852                                 "integer/index/float type">:$input);
2853  let results = (outs AnyVectorOfAnyRank:$aggregate);
2854
2855  let builders = [
2856    OpBuilder<(ins "Value":$element, "Type":$aggregateType),
2857    [{ build($_builder, $_state, aggregateType, element); }]>];
2858  let assemblyFormat = "$input attr-dict `:` type($aggregate)";
2859
2860  let hasFolder = 1;
2861}
2862
2863//===----------------------------------------------------------------------===//
2864// VectorScaleOp
2865//===----------------------------------------------------------------------===//
2866
2867// TODO: In the future, we might want to have scalable vectors with different
2868//       scales for different dimensions. E.g.: vector<[16]x[16]xf32>, in
2869//       which case we might need to add an index to 'vscale' to select one
2870//       of them. In order to support GPUs, we might also want to differentiate
2871//       between a 'global' scale, a scale that's fixed throughout the
2872//       execution, and a 'local' scale that is fixed but might vary with each
2873//       call to the function. For that, it might be useful to have a
2874//       'vector.scale.global' and a 'vector.scale.local' operation.
2875def VectorScaleOp : Vector_Op<"vscale",
2876  [Pure, DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]
2877> {
2878  let summary = "Load vector scale size";
2879  let description = [{
2880    The `vscale` op returns the scale of the scalable vectors, a positive
2881    integer value that is constant at runtime but unknown at compile-time.
2882    The scale of the vector indicates the multiplicity of the vectors and
2883    vector operations. For example, a `vector<[4]xi32>` is equivalent to
2884    `vscale` consecutive `vector<4xi32>`; and an operation on a
2885    `vector<[4]xi32>` is equivalent to performing that operation `vscale`
2886    times, once on each `<4xi32>` segment of the scalable vector. The `vscale`
2887    op can be used to calculate the step in vector-length agnostic (VLA) loops.
2888    Right now we only support one contiguous set of scalable dimensions, all of
2889    them grouped and scaled with the value returned by 'vscale'.
2890  }];
2891  let results = (outs Index:$res);
2892  let assemblyFormat = "attr-dict";
2893
2894  let extraClassDefinition = [{
2895    void $cppClass::getAsmResultNames(
2896        ::llvm::function_ref<void(mlir::Value, mlir::StringRef)> setNameFn) {
2897      setNameFn(getResult(), "vscale");
2898    }
2899  }];
2900}
2901
2902//===----------------------------------------------------------------------===//
2903// VectorScanOp
2904//===----------------------------------------------------------------------===//
2905
2906def Vector_ScanOp :
2907  Vector_Op<"scan", [Pure,
2908    AllTypesMatch<["source", "dest"]>,
2909    AllTypesMatch<["initial_value", "accumulated_value"]> ]>,
2910    Arguments<(ins Vector_CombiningKindAttr:$kind,
2911                   AnyVectorOfNonZeroRank:$source,
2912                   AnyVectorOfAnyRank:$initial_value,
2913                   I64Attr:$reduction_dim,
2914                   BoolAttr:$inclusive)>,
2915    Results<(outs AnyVectorOfNonZeroRank:$dest,
2916                  AnyVectorOfAnyRank:$accumulated_value)> {
2917  let summary = "Scan operation";
2918  let description = [{
2919    Performs an inclusive/exclusive scan on an n-D vector along a single
2920    dimension returning an n-D result vector using the given
2921    operation (`add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for
2922    integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf`/`maximumf` for
2923    floats), and a specified value for the initial value. The operator returns
2924    the result of scan as well as the result of the last reduction in the scan.
2925
2926    Example:
2927
2928    ```mlir
2929    %1:2 = vector.scan <add>, %0, %acc {inclusive = false, reduction_dim = 1 : i64} :
2930      vector<4x8x16x32xf32>, vector<4x16x32xf32>
2931    ```
2932  }];
2933  let builders = [
2934    OpBuilder<(ins "Value":$source, "Value":$initial_value,
2935                   "CombiningKind":$kind,
2936                   CArg<"int64_t", "0">:$reduction_dim,
2937                   CArg<"bool", "true">:$inclusive)>
2938  ];
2939  let extraClassDeclaration = [{
2940    VectorType getSourceType() {
2941      return ::llvm::cast<VectorType>(getSource().getType());
2942    }
2943    VectorType getDestType() {
2944      return ::llvm::cast<VectorType>(getDest().getType());
2945    }
2946    VectorType getAccumulatorType() {
2947      return ::llvm::cast<VectorType>(getAccumulatedValue().getType());
2948    }
2949    VectorType getInitialValueType() {
2950      return ::llvm::cast<VectorType>(getInitialValue().getType());
2951    }
2952  }];
2953  let assemblyFormat =
2954    "$kind `,` $source `,` $initial_value attr-dict `:` "
2955    "type($source) `,` type($initial_value) ";
2956  let hasVerifier = 1;
2957}
2958
2959//===----------------------------------------------------------------------===//
2960// VectorStepOp
2961//===----------------------------------------------------------------------===//
2962
2963def Vector_StepOp : Vector_Op<"step", [Pure]> {
2964  let summary = "A linear sequence of values from 0 to N";
2965  let description = [{
2966    A `step` operation produces an index vector, i.e. a 1-D vector of values of
2967    index type that represents a linear sequence from 0 to N-1, where N is the
2968    number of elements in the `result` vector.
2969
2970    Supports fixed-width and scalable vectors.
2971
2972    Examples:
2973
2974    ```mlir
2975    %0 = vector.step : vector<4xindex> ; [0, 1, 2, 3]
2976    %1 = vector.step : vector<[4]xindex> ; [0, 1, .., <vscale * 4 - 1>]
2977    ```
2978  }];
2979  let results = (outs VectorOfRankAndType<[1], [Index]>:$result);
2980  let assemblyFormat = "attr-dict `:` type($result)";
2981}
2982
2983def Vector_YieldOp : Vector_Op<"yield", [
2984    Pure, ReturnLike, Terminator]> {
2985  let summary = "Terminates and yields values from vector regions.";
2986  let description = [{
2987    "vector.yield" yields an SSA value from the Vector dialect op region and
2988    terminates the regions. The semantics of how the values are yielded is
2989    defined by the parent operation.
2990    If "vector.yield" has any operands, the operands must correspond to the
2991    parent operation's results.
2992    If the parent operation defines no value the vector.yield may be omitted
2993    when printing the region.
2994  }];
2995
2996  let arguments = (ins Variadic<AnyType>:$operands);
2997
2998  let builders = [
2999    OpBuilder<(ins), [{ /* nothing to do */ }]>,
3000  ];
3001
3002  let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?";
3003}
3004
3005
3006#endif // MLIR_DIALECT_VECTOR_IR_VECTOR_OPS
3007