1//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Defines MLIR vector operations. 10// 11//===----------------------------------------------------------------------===// 12 13#ifndef MLIR_DIALECT_VECTOR_IR_VECTOR_OPS 14#define MLIR_DIALECT_VECTOR_IR_VECTOR_OPS 15 16include "mlir/Dialect/Arith/IR/ArithBase.td" 17include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" 18include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.td" 19include "mlir/Dialect/Vector/Interfaces/MaskingOpInterface.td" 20include "mlir/Dialect/Vector/IR/Vector.td" 21include "mlir/Dialect/Vector/IR/VectorAttributes.td" 22include "mlir/Interfaces/ControlFlowInterfaces.td" 23include "mlir/Interfaces/DestinationStyleOpInterface.td" 24include "mlir/Interfaces/InferIntRangeInterface.td" 25include "mlir/Interfaces/InferTypeOpInterface.td" 26include "mlir/Interfaces/SideEffectInterfaces.td" 27include "mlir/Interfaces/VectorInterfaces.td" 28include "mlir/Interfaces/ViewLikeInterface.td" 29include "mlir/IR/BuiltinAttributes.td" 30include "mlir/IR/EnumAttr.td" 31 32// TODO: Add an attribute to specify a different algebra with operators other 33// than the current set: {*, +}. 34def Vector_ContractionOp : 35 Vector_Op<"contract", [ 36 Pure, 37 PredOpTrait<"lhs and rhs have same element type", TCopVTEtIsSameAs<0, 1>>, 38 PredOpTrait<"third operand acc and result have same element type", 39 TCresVTEtIsSameAsOpBase<0, 2>>, 40 DeclareOpInterfaceMethods<MaskableOpInterface>, 41 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> 42 ]>, 43 Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyVectorOfNonZeroRank:$rhs, AnyType:$acc, 44 ArrayAttr:$indexing_maps, 45 Vector_IteratorTypeArrayAttr:$iterator_types, 46 DefaultValuedAttr<Vector_CombiningKindAttr, 47 "CombiningKind::ADD">:$kind)>, 48 Results<(outs AnyType)> { 49 let summary = "vector contraction operation"; 50 let description = [{ 51 Computes the sum of products of vector elements along contracting 52 dimension pairs from 2 vectors of rank M and N respectively, adds this 53 intermediate result to the accumulator argument of rank K, and returns a 54 vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims + 55 num_batch_dims (see dimension type descriptions below)). For K = 0 (no 56 free or batch dimensions), the accumulator and output are a scalar. 57 58 If operands and the result have types of different bitwidths, operands are 59 promoted to have the same bitwidth as the result before performing the 60 contraction. For integer types, only signless integer types are supported, 61 and the promotion happens via sign extension. 62 63 An iterator type attribute list must be specified, where each element of 64 the list represents an iterator with one of the following types: 65 66 * "reduction": reduction dimensions are present in the lhs and rhs 67 arguments but not in the output (and accumulator 68 argument). These are the dimensions along which the vector 69 contraction op computes the sum of products, and 70 contracting dimension pair dimension sizes must match 71 between lhs/rhs. 72 73 * "parallel": Batch dimensions are iterator type "parallel", and 74 are non-contracting dimensions present in the lhs, rhs and 75 output. The lhs/rhs co-iterate along the batch dimensions, 76 which should be expressed in their indexing maps. 77 78 Free dimensions are iterator type "parallel", and are 79 non-contraction, non-batch dimensions accessed by either the 80 lhs or rhs (but not both). The lhs and rhs free dimensions 81 are unrelated to each other and do not co-iterate, which 82 should be expressed in their indexing maps. 83 84 An indexing map attribute list must be specified with an entry for lhs, rhs 85 and acc arguments. An indexing map attribute specifies a mapping from each 86 iterator in the iterator type list, to each dimension of an N-D vector. 87 88 An optional kind attribute may be used to specify the combining function 89 between the intermediate result and accumulator argument of rank K. This 90 attribute can take the values `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui` 91 /`and`/`or`/`xor` for integers, and `add`/`mul`/`minnumf`/`maxnumf` 92 /`minimumf`/`maximumf` for floats. The default is `add`. 93 94 Example: 95 96 ```mlir 97 // Simple DOT product (K = 0). 98 #contraction_accesses = [ 99 affine_map<(i) -> (i)>, 100 affine_map<(i) -> (i)>, 101 affine_map<(i) -> ()> 102 ] 103 #contraction_trait = { 104 indexing_maps = #contraction_accesses, 105 iterator_types = ["reduction"] 106 } 107 %3 = vector.contract #contraction_trait %0, %1, %2 108 : vector<10xf32>, vector<10xf32> into f32 109 110 // 2D vector contraction with one contracting dimension (matmul, K = 2). 111 #contraction_accesses = [ 112 affine_map<(i, j, k) -> (i, k)>, 113 affine_map<(i, j, k) -> (k, j)>, 114 affine_map<(i, j, k) -> (i, j)> 115 ] 116 #contraction_trait = { 117 indexing_maps = #contraction_accesses, 118 iterator_types = ["parallel", "parallel", "reduction"] 119 } 120 121 %3 = vector.contract #contraction_trait %0, %1, %2 122 : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32> 123 124 // 4D to 3D vector contraction with two contracting dimensions and 125 // one batch dimension (K = 3). 126 #contraction_accesses = [ 127 affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>, 128 affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>, 129 affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)> 130 ] 131 #contraction_trait = { 132 indexing_maps = #contraction_accesses, 133 iterator_types = ["parallel", "parallel", "parallel", 134 "reduction", "reduction"] 135 } 136 137 %4 = vector.contract #contraction_trait %0, %1, %2 138 : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32> 139 140 // Vector contraction with mixed typed. lhs/rhs have different element 141 // types than accumulator/result. 142 %5 = vector.contract #contraction_trait %0, %1, %2 143 : vector<10xf16>, vector<10xf16> into f32 144 145 // Contract with max (K = 0). 146 #contraction_accesses = [ 147 affine_map<(i) -> (i)>, 148 affine_map<(i) -> (i)>, 149 affine_map<(i) -> ()> 150 ] 151 #contraction_trait = { 152 indexing_maps = #contraction_accesses, 153 iterator_types = ["reduction"], 154 kind = #vector.kind<maxnumf> 155 } 156 %6 = vector.contract #contraction_trait %0, %1, %2 157 : vector<10xf32>, vector<10xf32> into f32 158 ``` 159 }]; 160 let builders = [ 161 OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, 162 "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes)>, 163 OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, 164 "ArrayRef<ArrayRef<AffineExpr>>":$indexingExprs, 165 "ArrayRef<IteratorType>":$iteratorTypes)>, 166 OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, 167 "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes, 168 "CombiningKind":$kind)> 169 ]; 170 let extraClassDeclaration = [{ 171 VectorType getLhsType() { 172 return ::llvm::cast<VectorType>(getLhs().getType()); 173 } 174 VectorType getRhsType() { 175 return ::llvm::cast<VectorType>(getRhs().getType()); 176 } 177 Type getAccType() { return getAcc().getType(); } 178 Type getResultType() { return getResult().getType(); } 179 SmallVector<StringRef> getTraitAttrNames(); 180 static unsigned getAccOperandIndex() { return 2; } 181 182 llvm::SmallVector<::mlir::AffineMap, 4> getIndexingMapsArray() { 183 return llvm::to_vector<4>(getIndexingMaps().getAsValueRange<::mlir::AffineMapAttr>()); 184 } 185 186 // Returns the bounds of each dimension in the iteration space spanned 187 // by the iterator types of this operation. 188 void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds); 189 190 // Returns a list of index maps, where there is a list entry for each 191 // op indexing map attribute (i.e. one for each input and output, with 192 // the output listed last). Each index map, maps from this operations 193 // iteration space, to vector dimensions of the maps input/output. 194 void getIterationIndexMap( 195 std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap); 196 197 std::vector<std::pair<int64_t, int64_t>> getContractingDimMap(); 198 std::vector<std::pair<int64_t, int64_t>> getBatchDimMap(); 199 200 static CombiningKind getDefaultKind() { 201 return CombiningKind::ADD; 202 } 203 204 SmallVector<IteratorType> getIteratorTypesArray() { 205 auto range = 206 getIteratorTypes() 207 .template getAsValueRange<IteratorTypeAttr, IteratorType>(); 208 return {range.begin(), range.end()}; 209 } 210 }]; 211 212 let hasCanonicalizer = 1; 213 let hasCustomAssemblyFormat = 1; 214 let hasVerifier = 1; 215} 216 217def Vector_ReductionOp : 218 Vector_Op<"reduction", [Pure, 219 PredOpTrait<"source operand and result have same element type", 220 TCresVTEtIsSameAsOpBase<0, 0>>, 221 OptionalTypesMatchWith<"dest and acc have the same type", 222 "dest", "acc", "::llvm::cast<Type>($_self)">, 223 DeclareOpInterfaceMethods<ArithFastMathInterface>, 224 DeclareOpInterfaceMethods<MaskableOpInterface>, 225 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> 226 ]>, 227 Arguments<(ins Vector_CombiningKindAttr:$kind, 228 AnyVectorOfAnyRank:$vector, 229 Optional<AnyType>:$acc, 230 DefaultValuedAttr< 231 Arith_FastMathAttr, 232 "::mlir::arith::FastMathFlags::none">:$fastmath)>, 233 Results<(outs AnyType:$dest)> { 234 let summary = "reduction operation"; 235 let description = [{ 236 Reduces an 1-D vector "horizontally" into a scalar using the given 237 operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for 238 integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf`/`maximumf` for 239 floats. Reductions also allow an optional fused accumulator. 240 241 Note that these operations are restricted to 1-D vectors to remain 242 close to the corresponding LLVM intrinsics: 243 244 http://llvm.org/docs/LangRef.html#vector-reduction-intrinsics 245 246 Example: 247 248 ```mlir 249 %1 = vector.reduction <add>, %0 : vector<16xf32> into f32 250 251 %3 = vector.reduction <xor>, %2 : vector<4xi32> into i32 252 253 %4 = vector.reduction <mul>, %0, %1 : vector<16xf32> into f32 254 ``` 255 }]; 256 let extraClassDeclaration = [{ 257 VectorType getSourceVectorType() { 258 return ::llvm::cast<VectorType>(getVector().getType()); 259 } 260 }]; 261 let builders = [ 262 // Builder that infers the type of `dest`. 263 OpBuilder<(ins "CombiningKind":$kind, "Value":$vector, "Value":$acc, 264 CArg<"::mlir::arith::FastMathFlags", 265 "::mlir::arith::FastMathFlags::none">:$fastMathFlags)>, 266 // Builder that infers the type of `dest` and has no accumulator. 267 OpBuilder<(ins "CombiningKind":$kind, "Value":$vector, 268 CArg<"::mlir::arith::FastMathFlags", 269 "::mlir::arith::FastMathFlags::none">:$fastMathFlags)> 270 ]; 271 272 let assemblyFormat = "$kind `,` $vector (`,` $acc^)? (`fastmath` `` $fastmath^)?" 273 " attr-dict `:` type($vector) `into` type($dest)"; 274 let hasCanonicalizer = 1; 275 let hasVerifier = 1; 276} 277 278def Vector_MultiDimReductionOp : 279 Vector_Op<"multi_reduction", [Pure, 280 AllTypesMatch<["dest", "acc"]>, 281 PredOpTrait<"source operand and result have same element type", 282 TCresVTEtIsSameAsOpBase<0, 0>>, 283 DeclareOpInterfaceMethods<InferTypeOpInterface>, 284 DeclareOpInterfaceMethods<MaskableOpInterface>, 285 DeclareOpInterfaceMethods<VectorUnrollOpInterface, 286 ["getShapeForUnroll"]>]>, 287 Arguments<(ins Vector_CombiningKindAttr:$kind, 288 AnyVectorOfNonZeroRank:$source, 289 AnyType:$acc, 290 DenseI64ArrayAttr:$reduction_dims)>, 291 Results<(outs AnyType:$dest)> { 292 let summary = "Multi-dimensional reduction operation"; 293 let description = [{ 294 Reduces an n-D vector into an (n-k)-D vector (or a scalar when k == n) 295 using the given operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui` 296 /`and`/`or`/`xor` for integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf` 297 /`maximumf` for floats. 298 Takes an initial accumulator operand. 299 300 Example: 301 302 ```mlir 303 %1 = vector.multi_reduction <add>, %0, %acc0 [1, 3] : 304 vector<4x8x16x32xf32> to vector<4x16xf32> 305 %2 = vector.multi_reduction <add>, %1, %acc1 [0, 1] : 306 vector<4x16xf32> to f32 307 ``` 308 }]; 309 let builders = [ 310 OpBuilder<(ins "Value":$source, "Value":$acc, 311 "ArrayRef<bool>":$reductionMask, "CombiningKind":$kind)> 312 ]; 313 let extraClassDeclaration = [{ 314 VectorType getSourceVectorType() { 315 return ::llvm::cast<VectorType>(getSource().getType()); 316 } 317 Type getDestType() { 318 return getDest().getType(); 319 } 320 321 bool isReducedDim(int64_t d) { 322 assert(d >= 0 && d < static_cast<int64_t>(getReductionMask().size()) && 323 "d overflows the number of dims"); 324 return getReductionMask()[d]; 325 } 326 327 SmallVector<bool> getReductionMask() { 328 SmallVector<bool> res(getSourceVectorType().getRank(), false); 329 for (int64_t dim : getReductionDims()) 330 res[dim] = true; 331 return res; 332 } 333 static SmallVector<bool> getReductionMask( 334 ArrayRef<int64_t> reductionDims, unsigned sourceRank) { 335 SmallVector<bool> res(sourceRank, false); 336 for (auto idx : reductionDims) 337 res[idx] = true; 338 return res; 339 } 340 }]; 341 let assemblyFormat = 342 "$kind `,` $source `,` $acc attr-dict $reduction_dims `:` type($source) `to` type($dest)"; 343 let hasFolder = 1; 344 let hasCanonicalizer = 1; 345 let hasVerifier = 1; 346} 347 348def Vector_BroadcastOp : 349 Vector_Op<"broadcast", [Pure, 350 DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, 351 PredOpTrait<"source operand and result have same element type", 352 TCresVTEtIsSameAsOpBase<0, 0>>]>, 353 Arguments<(ins AnyType:$source)>, 354 Results<(outs AnyVectorOfAnyRank:$vector)> { 355 let summary = "broadcast operation"; 356 let description = [{ 357 Broadcasts the scalar or k-D vector value in the source operand 358 to a n-D result vector such that the broadcast makes sense, i.e., 359 the source operand is duplicated to match the given rank and sizes 360 in the result vector. The legality rules are: 361 * the source operand must have the same element type as the result type 362 * a k-D vector <s_1 x .. x s_k x type> can be broadcast to 363 a n-D vector <t_1 x .. x t_n x type> if 364 * k <= n, and 365 * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n 366 match exactly as s_j = t_i or s_j = 1: 367 ``` 368 t_1 x .. t_n-k x t_n-k+1 x .. x t_i x .. x t_n 369 s_1 x .. x s_j x .. x s_k 370 <duplication> <potential stretch> 371 ``` 372 * in addition, any scalable unit dimension, `[1]`, must match exactly. 373 374 The source operand is duplicated over all the missing leading dimensions 375 and stretched over the trailing dimensions where the source has a non-equal 376 dimension of 1. These rules imply that any scalar broadcast (k=0) to any 377 shaped vector with the same element type is always legal. 378 379 Example: 380 381 ```mlir 382 %0 = arith.constant 0.0 : f32 383 %1 = vector.broadcast %0 : f32 to vector<16xf32> 384 %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32> 385 ``` 386 }]; 387 let extraClassDeclaration = [{ 388 Type getSourceType() { return getSource().getType(); } 389 VectorType getResultVectorType() { 390 return ::llvm::cast<VectorType>(getVector().getType()); 391 } 392 393 /// Return the dimensions of the result vector that were formerly ones in the 394 /// source tensor and thus correspond to "dim-1" broadcasting. 395 llvm::SetVector<int64_t> computeBroadcastedUnitDims(); 396 397 /// Broadcast `value` to a vector of `dstShape`, knowing that exactly the 398 /// `broadcastedDims` dimensions in the dstShape are broadcasted. 399 /// This requires (and asserts) that the broadcast is free of dim-1 400 /// broadcasting. 401 /// Since vector.broadcast only allows expanding leading dimensions, an extra 402 /// vector.transpose may be inserted to make the broadcast possible. 403 /// `value`, `dstShape` and `broadcastedDims` must be properly specified or 404 /// the helper will assert. This means: 405 /// 1. `dstShape` must not be empty. 406 /// 2. `broadcastedDims` must be confined to [0 .. rank(value.getResultVectorType)] 407 /// 2. `dstShape` trimmed of the dimensions specified in `broadcastedDims` 408 // must match the `value` shape. 409 static Value createOrFoldBroadcastOp( 410 OpBuilder &b, Value value, 411 ArrayRef<int64_t> dstShape, 412 const llvm::SetVector<int64_t> &broadcastedDims); 413 }]; 414 let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)"; 415 let hasFolder = 1; 416 let hasCanonicalizer = 1; 417 let hasVerifier = 1; 418} 419 420def Vector_ShuffleOp 421 : Vector_Op< 422 "shuffle", 423 [Pure, 424 PredOpTrait<"first operand v1 and result have same element type", 425 TCresVTEtIsSameAsOpBase<0, 0>>, 426 PredOpTrait<"second operand v2 and result have same element type", 427 TCresVTEtIsSameAsOpBase<0, 1>>, 428 InferTypeOpAdaptor]>, 429 Arguments<(ins AnyFixedVectorOfAnyRank:$v1, AnyFixedVectorOfAnyRank:$v2, 430 DenseI64ArrayAttr:$mask)>, 431 Results<(outs AnyVectorOfNonZeroRank:$vector)> { 432 let summary = "shuffle operation"; 433 let description = [{ 434 The shuffle operation constructs a permutation (or duplication) of elements 435 from two input vectors, returning a vector with the same element type as 436 the input and a length that is the same as the shuffle mask. The two input 437 vectors must have the same element type, same rank, and trailing dimension 438 sizes and shuffles their values in the leading dimension (which may differ 439 in size) according to the given mask. The legality rules are: 440 * the two operands must have the same element type as the result 441 - Either, the two operands and the result must have the same 442 rank and trailing dimension sizes, viz. given two k-D operands 443 v1 : <s_1 x s_2 x .. x s_k x type> and 444 v2 : <t_1 x t_2 x .. x t_k x type> 445 we have s_i = t_i for all 1 < i <= k 446 - Or, the two operands must be 0-D vectors and the result is a 1-D vector. 447 * the mask length equals the leading dimension size of the result 448 * numbering the input vector indices left to right across the operands, all 449 mask values must be within range, viz. given two k-D operands v1 and v2 450 above, all mask values are in the range [0,s_1+t_1). The value `-1` 451 represents a poison mask value, which specifies that the selected element 452 is poison. 453 454 Note, scalable vectors are not supported. 455 456 Example: 457 458 ```mlir 459 %0 = vector.shuffle %a, %b[0, 3] 460 : vector<2xf32>, vector<2xf32> ; yields vector<2xf32> 461 %1 = vector.shuffle %c, %b[0, 1, 2] 462 : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32> 463 %2 = vector.shuffle %a, %b[3, 2, 1, 0] 464 : vector<2xf32>, vector<2xf32> ; yields vector<4xf32> 465 %3 = vector.shuffle %a, %b[0, 1] 466 : vector<f32>, vector<f32> ; yields vector<2xf32> 467 %4 = vector.shuffle %a, %b[0, 4, -1, -1, -1, -1] 468 : vector<4xf32>, vector<4xf32> ; yields vector<6xf32> 469 ``` 470 }]; 471 472 let extraClassDeclaration = extraPoisonClassDeclaration # [{ 473 VectorType getV1VectorType() { 474 return ::llvm::cast<VectorType>(getV1().getType()); 475 } 476 VectorType getV2VectorType() { 477 return ::llvm::cast<VectorType>(getV2().getType()); 478 } 479 VectorType getResultVectorType() { 480 return ::llvm::cast<VectorType>(getVector().getType()); 481 } 482 }]; 483 484 let assemblyFormat = "operands $mask attr-dict `:` type(operands)"; 485 486 let hasFolder = 1; 487 let hasVerifier = 1; 488 let hasCanonicalizer = 1; 489} 490 491def ResultIsDoubleSourceVectorType : TypesMatchWith< 492 "type of 'result' is double the width of the inputs", 493 "lhs", "result", 494 [{ 495 [&]() -> ::mlir::VectorType { 496 auto vectorType = ::llvm::cast<::mlir::VectorType>($_self); 497 ::mlir::VectorType::Builder builder(vectorType); 498 if (vectorType.getRank() == 0) { 499 static constexpr int64_t v2xTyShape[] = {2}; 500 return builder.setShape(v2xTyShape); 501 } 502 auto lastDim = vectorType.getRank() - 1; 503 return builder.setDim(lastDim, vectorType.getDimSize(lastDim) * 2); 504 }() 505 }]>; 506 507def Vector_InterleaveOp : 508 Vector_Op<"interleave", [Pure, AllTypesMatch<["lhs", "rhs"]>, 509 ResultIsDoubleSourceVectorType]> { 510 let summary = "constructs a vector by interleaving two input vectors"; 511 let description = [{ 512 The interleave operation constructs a new vector by interleaving the 513 elements from the trailing (or final) dimension of two input vectors, 514 returning a new vector where the trailing dimension is twice the size. 515 516 Note that for the n-D case this differs from the interleaving possible with 517 `vector.shuffle`, which would only operate on the leading dimension. 518 519 Another key difference is this operation supports scalable vectors, though 520 currently a general LLVM lowering is limited to the case where only the 521 trailing dimension is scalable. 522 523 Example: 524 ```mlir 525 %a = arith.constant dense<[0, 1]> : vector<2xi32> 526 %b = arith.constant dense<[2, 3]> : vector<2xi32> 527 // The value of `%0` is `[0, 2, 1, 3]`. 528 %0 = vector.interleave %a, %b : vector<2xi32> -> vector<4xi32> 529 530 // Examples showing allowed input and result types. 531 %1 = vector.interleave %c, %d : vector<f16> -> vector<2xf16> 532 %2 = vector.interleave %e, %f : vector<6x3xf32> -> vector<6x6xf32> 533 %3 = vector.interleave %g, %h : vector<[4]xi32> -> vector<[8]xi32> 534 %4 = vector.interleave %i, %j : vector<2x4x[2]xf64> -> vector<2x4x[4]xf64> 535 ``` 536 }]; 537 538 let arguments = (ins AnyVectorOfAnyRank:$lhs, AnyVectorOfAnyRank:$rhs); 539 let results = (outs AnyVectorOfNonZeroRank:$result); 540 541 let assemblyFormat = [{ 542 $lhs `,` $rhs attr-dict `:` type($lhs) `->` type($result) 543 }]; 544 545 let extraClassDeclaration = [{ 546 VectorType getSourceVectorType() { 547 return ::llvm::cast<VectorType>(getLhs().getType()); 548 } 549 VectorType getResultVectorType() { 550 return ::llvm::cast<VectorType>(getResult().getType()); 551 } 552 }]; 553} 554 555class ResultIsHalfSourceVectorType<string result> : TypesMatchWith< 556 "the trailing dimension of the results is half the width of source trailing dimension", 557 "source", result, 558 [{ 559 [&]() -> ::mlir::VectorType { 560 auto vectorType = ::llvm::cast<mlir::VectorType>($_self); 561 ::mlir::VectorType::Builder builder(vectorType); 562 auto lastDim = vectorType.getRank() - 1; 563 auto newDimSize = vectorType.getDimSize(lastDim) / 2;; 564 if (newDimSize <= 0) 565 return vectorType; // (invalid input type) 566 return builder.setDim(lastDim, newDimSize); 567 }() 568 }] 569>; 570 571def SourceVectorEvenElementCount : PredOpTrait< 572 "the trailing dimension of the source vector has an even number of elements", 573 CPred<[{ 574 [&](){ 575 auto srcVec = getSourceVectorType(); 576 return srcVec.getDimSize(srcVec.getRank() - 1) % 2 == 0; 577 }() 578 }]> 579>; 580 581def Vector_DeinterleaveOp : 582 Vector_Op<"deinterleave", [Pure, 583 SourceVectorEvenElementCount, 584 ResultIsHalfSourceVectorType<"res1">, 585 AllTypesMatch<["res1", "res2"]> 586 ]> { 587 let summary = "constructs two vectors by deinterleaving an input vector"; 588 let description = [{ 589 The deinterleave operation constructs two vectors from a single input 590 vector. The first result vector contains the elements from even indexes 591 of the input, and the second contains elements from odd indexes. This is 592 the inverse of a `vector.interleave` operation. 593 594 Each output's trailing dimension is half of the size of the input 595 vector's trailing dimension. This operation requires the input vector 596 to have a rank > 0 and an even number of elements in its trailing 597 dimension. 598 599 The operation supports scalable vectors. 600 601 Example: 602 ```mlir 603 %0, %1 = vector.deinterleave %a 604 : vector<8xi8> -> vector<4xi8> 605 %2, %3 = vector.deinterleave %b 606 : vector<2x8xi8> -> vector<2x4xi8> 607 %4, %5 = vector.deinterleave %c 608 : vector<2x8x4xi8> -> vector<2x8x2xi8> 609 %6, %7 = vector.deinterleave %d 610 : vector<[8]xf32> -> vector<[4]xf32> 611 %8, %9 = vector.deinterleave %e 612 : vector<2x[6]xf64> -> vector<2x[3]xf64> 613 %10, %11 = vector.deinterleave %f 614 : vector<2x4x[6]xf64> -> vector<2x4x[3]xf64> 615 ``` 616 }]; 617 618 let arguments = (ins AnyVectorOfNonZeroRank:$source); 619 let results = (outs AnyVectorOfNonZeroRank:$res1, AnyVectorOfNonZeroRank:$res2); 620 621 let assemblyFormat = [{ 622 $source attr-dict `:` type($source) `->` type($res1) 623 }]; 624 625 let extraClassDeclaration = [{ 626 VectorType getSourceVectorType() { 627 return ::llvm::cast<VectorType>(getSource().getType()); 628 } 629 VectorType getResultVectorType() { 630 return ::llvm::cast<VectorType>(getRes1().getType()); 631 } 632 }]; 633 } 634 635def Vector_ExtractElementOp : 636 Vector_Op<"extractelement", [Pure, 637 DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, 638 TypesMatchWith<"result type matches element type of vector operand", 639 "vector", "result", 640 "::llvm::cast<VectorType>($_self).getElementType()">]>, 641 Arguments<(ins AnyVectorOfAnyRank:$vector, 642 Optional<AnySignlessIntegerOrIndex>:$position)>, 643 Results<(outs AnyType:$result)> { 644 let summary = "extractelement operation"; 645 let description = [{ 646 Takes a 0-D or 1-D vector and a optional dynamic index position and 647 extracts the scalar at that position. 648 649 Note that this instruction resembles vector.extract, but is restricted to 650 0-D and 1-D vectors and relaxed to dynamic indices. 651 If the vector is 0-D, the position must be std::nullopt. 652 653 654 It is meant to be closer to LLVM's version: 655 https://llvm.org/docs/LangRef.html#extractelement-instruction 656 657 Example: 658 659 ```mlir 660 %c = arith.constant 15 : i32 661 %1 = vector.extractelement %0[%c : i32]: vector<16xf32> 662 %2 = vector.extractelement %z[]: vector<f32> 663 ``` 664 }]; 665 let assemblyFormat = [{ 666 $vector `[` ($position^ `:` type($position))? `]` attr-dict `:` type($vector) 667 }]; 668 669 let builders = [ 670 // 0-D builder. 671 OpBuilder<(ins "Value":$source)>, 672 ]; 673 let extraClassDeclaration = [{ 674 VectorType getSourceVectorType() { 675 return ::llvm::cast<VectorType>(getVector().getType()); 676 } 677 }]; 678 let hasVerifier = 1; 679 let hasFolder = 1; 680} 681 682def Vector_ExtractOp : 683 Vector_Op<"extract", [Pure, 684 DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, 685 PredOpTrait<"operand and result have same element type", 686 TCresVTEtIsSameAsOpBase<0, 0>>, 687 InferTypeOpAdaptorWithIsCompatible]> { 688 let summary = "extract operation"; 689 let description = [{ 690 Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at 691 the proper position. Degenerates to an element type if n-k is zero. 692 693 Static and dynamic indices must be greater or equal to zero and less than 694 the size of the corresponding dimension. The result is undefined if any 695 index is out-of-bounds. The value `-1` represents a poison index, which 696 specifies that the extracted element is poison. 697 698 Example: 699 700 ```mlir 701 %1 = vector.extract %0[3]: vector<8x16xf32> from vector<4x8x16xf32> 702 %2 = vector.extract %0[2, 1, 3]: f32 from vector<4x8x16xf32> 703 %3 = vector.extract %1[]: vector<f32> from vector<f32> 704 %4 = vector.extract %0[%a, %b, %c]: f32 from vector<4x8x16xf32> 705 %5 = vector.extract %0[2, %b]: vector<16xf32> from vector<4x8x16xf32> 706 %6 = vector.extract %10[-1, %c]: f32 from vector<4x16xf32> 707 ``` 708 }]; 709 710 let arguments = (ins 711 AnyVectorOfAnyRank:$vector, 712 Variadic<Index>:$dynamic_position, 713 DenseI64ArrayAttr:$static_position 714 ); 715 let results = (outs AnyType:$result); 716 717 let builders = [ 718 OpBuilder<(ins "Value":$source, "int64_t":$position)>, 719 OpBuilder<(ins "Value":$source, "OpFoldResult":$position)>, 720 OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$position)>, 721 OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$position)>, 722 ]; 723 724 let extraClassDeclaration = extraPoisonClassDeclaration # [{ 725 VectorType getSourceVectorType() { 726 return ::llvm::cast<VectorType>(getVector().getType()); 727 } 728 729 /// Return a vector with all the static and dynamic position indices. 730 SmallVector<OpFoldResult> getMixedPosition() { 731 OpBuilder builder(getContext()); 732 return getMixedValues(getStaticPosition(), getDynamicPosition(), builder); 733 } 734 735 unsigned getNumIndices() { 736 return getStaticPosition().size(); 737 } 738 739 /// Return "true" if the op has at least one dynamic position. 740 bool hasDynamicPosition() { 741 return !getDynamicPosition().empty(); 742 } 743 }]; 744 745 let assemblyFormat = [{ 746 $vector `` 747 custom<DynamicIndexList>($dynamic_position, $static_position) 748 attr-dict `:` type($result) `from` type($vector) 749 }]; 750 751 let hasCanonicalizer = 1; 752 let hasFolder = 1; 753 let hasVerifier = 1; 754} 755 756def Vector_FMAOp : 757 Op<Vector_Dialect, "fma", [ 758 Pure, AllTypesMatch<["lhs", "rhs", "acc", "result"]>, 759 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> 760 ] # ElementwiseMappable.traits>, 761 Arguments<(ins VectorOfAnyRankOf<[AnyFloat]>:$lhs, 762 VectorOfAnyRankOf<[AnyFloat]>:$rhs, 763 VectorOfAnyRankOf<[AnyFloat]>:$acc)>, 764 Results<(outs VectorOfAnyRankOf<[AnyFloat]>:$result)> { 765 let summary = "vector fused multiply-add"; 766 let description = [{ 767 Multiply-add expressions operate on n-D vectors and compute a fused 768 pointwise multiply-and-accumulate: `$result = $lhs * $rhs + $acc`. 769 All operands and result have the same vector type. The semantics 770 of the operation correspond to those of the `llvm.fma` 771 [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the 772 particular case of lowering to LLVM, this is guaranteed to lower 773 to the `llvm.fma.*` intrinsic. 774 775 Example: 776 777 ```mlir 778 %3 = vector.fma %0, %1, %2: vector<8x16xf32> 779 ``` 780 }]; 781 let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)"; 782 let extraClassDeclaration = [{ 783 VectorType getVectorType() { return ::llvm::cast<VectorType>(getLhs().getType()); } 784 }]; 785} 786 787def Vector_FromElementsOp : Vector_Op<"from_elements", [ 788 Pure, 789 TypesMatchWith<"operand types match result element type", 790 "result", "elements", "SmallVector<Type>(" 791 "::llvm::cast<VectorType>($_self).getNumElements(), " 792 "::llvm::cast<VectorType>($_self).getElementType())">]> { 793 let summary = "operation that defines a vector from scalar elements"; 794 let description = [{ 795 This operation defines a vector from one or multiple scalar elements. The 796 number of elements must match the number of elements in the result type. 797 All elements must have the same type, which must match the element type of 798 the result vector type. 799 800 `elements` are a flattened version of the result vector in row-major order. 801 802 Example: 803 804 ```mlir 805 // %f1 806 %0 = vector.from_elements %f1 : vector<f32> 807 // [%f1, %f2] 808 %1 = vector.from_elements %f1, %f2 : vector<2xf32> 809 // [[%f1, %f2, %f3], [%f4, %f5, %f6]] 810 %2 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<2x3xf32> 811 // [[[%f1, %f2]], [[%f3, %f4]], [[%f5, %f6]]] 812 %3 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<3x1x2xf32> 813 ``` 814 815 Note, scalable vectors are not supported. 816 }]; 817 818 let arguments = (ins Variadic<AnyType>:$elements); 819 let results = (outs AnyFixedVectorOfAnyRank:$result); 820 let assemblyFormat = "$elements attr-dict `:` type($result)"; 821 let hasCanonicalizer = 1; 822} 823 824def Vector_InsertElementOp : 825 Vector_Op<"insertelement", [Pure, 826 DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, 827 TypesMatchWith<"source operand type matches element type of result", 828 "result", "source", 829 "::llvm::cast<VectorType>($_self).getElementType()">, 830 AllTypesMatch<["dest", "result"]>]>, 831 Arguments<(ins AnyType:$source, AnyVectorOfAnyRank:$dest, 832 Optional<AnySignlessIntegerOrIndex>:$position)>, 833 Results<(outs AnyVectorOfAnyRank:$result)> { 834 let summary = "insertelement operation"; 835 let description = [{ 836 Takes a scalar source, a 0-D or 1-D destination vector and a dynamic index 837 position and inserts the source into the destination at the proper position. 838 839 Note that this instruction resembles vector.insert, but is restricted to 0-D 840 and 1-D vectors and relaxed to dynamic indices. 841 842 It is meant to be closer to LLVM's version: 843 https://llvm.org/docs/LangRef.html#insertelement-instruction 844 845 Example: 846 847 ```mlir 848 %c = arith.constant 15 : i32 849 %f = arith.constant 0.0f : f32 850 %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32> 851 %2 = vector.insertelement %f, %z[]: vector<f32> 852 ``` 853 }]; 854 let assemblyFormat = [{ 855 $source `,` $dest `[` ($position^ `:` type($position))? `]` attr-dict `:` 856 type($result) 857 }]; 858 859 let builders = [ 860 // 0-D builder. 861 OpBuilder<(ins "Value":$source, "Value":$dest)>, 862 ]; 863 let extraClassDeclaration = [{ 864 Type getSourceType() { return getSource().getType(); } 865 VectorType getDestVectorType() { 866 return ::llvm::cast<VectorType>(getDest().getType()); 867 } 868 }]; 869 let hasVerifier = 1; 870 let hasFolder = 1; 871} 872 873def Vector_InsertOp : 874 Vector_Op<"insert", [Pure, 875 DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, 876 PredOpTrait<"source operand and result have same element type", 877 TCresVTEtIsSameAsOpBase<0, 0>>, 878 AllTypesMatch<["dest", "result"]>]> { 879 let summary = "insert operation"; 880 let description = [{ 881 Takes an n-D source vector, an (n+k)-D destination vector and a k-D position 882 and inserts the n-D source into the (n+k)-D destination at the proper 883 position. Degenerates to a scalar or a 0-d vector source type when n = 0. 884 885 Static and dynamic indices must be greater or equal to zero and less than 886 the size of the corresponding dimension. The result is undefined if any 887 index is out-of-bounds. The value `-1` represents a poison index, which 888 specifies that the resulting vector is poison. 889 890 Example: 891 892 ```mlir 893 %2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32> 894 %5 = vector.insert %3, %4[2, 1, 3] : f32 into vector<4x8x16xf32> 895 %8 = vector.insert %6, %7[] : f32 into vector<f32> 896 %11 = vector.insert %9, %10[%a, %b, %c] : vector<f32> into vector<4x8x16xf32> 897 %12 = vector.insert %4, %10[2, %b] : vector<16xf32> into vector<4x8x16xf32> 898 %13 = vector.insert %20, %1[-1, %c] : f32 into vector<4x16xf32> 899 ``` 900 }]; 901 902 let arguments = (ins 903 AnyType:$source, 904 AnyVectorOfAnyRank:$dest, 905 Variadic<Index>:$dynamic_position, 906 DenseI64ArrayAttr:$static_position 907 ); 908 let results = (outs AnyVectorOfAnyRank:$result); 909 910 let builders = [ 911 OpBuilder<(ins "Value":$source, "Value":$dest, "int64_t":$position)>, 912 OpBuilder<(ins "Value":$source, "Value":$dest, "OpFoldResult":$position)>, 913 OpBuilder<(ins "Value":$source, "Value":$dest, "ArrayRef<int64_t>":$position)>, 914 OpBuilder<(ins "Value":$source, "Value":$dest, "ArrayRef<OpFoldResult>":$position)>, 915 ]; 916 917 let extraClassDeclaration = extraPoisonClassDeclaration # [{ 918 Type getSourceType() { return getSource().getType(); } 919 VectorType getDestVectorType() { 920 return ::llvm::cast<VectorType>(getDest().getType()); 921 } 922 923 /// Return a vector with all the static and dynamic position indices. 924 SmallVector<OpFoldResult> getMixedPosition() { 925 OpBuilder builder(getContext()); 926 return getMixedValues(getStaticPosition(), getDynamicPosition(), builder); 927 } 928 929 unsigned getNumIndices() { 930 return getStaticPosition().size(); 931 } 932 933 bool hasDynamicPosition() { 934 return llvm::any_of(getDynamicPosition(), 935 [](Value operand) { return operand != nullptr; }); 936 } 937 }]; 938 939 let assemblyFormat = [{ 940 $source `,` $dest custom<DynamicIndexList>($dynamic_position, $static_position) 941 attr-dict `:` type($source) `into` type($dest) 942 }]; 943 944 let hasCanonicalizer = 1; 945 let hasFolder = 1; 946 let hasVerifier = 1; 947} 948 949def Vector_ScalableInsertOp : 950 Vector_Op<"scalable.insert", [Pure, 951 AllElementTypesMatch<["source", "dest"]>, 952 AllTypesMatch<["dest", "res"]>, 953 PredOpTrait<"position is a multiple of the source length.", 954 CPred< 955 "(getPos() % getSourceVectorType().getNumElements()) == 0" 956 >>]>, 957 Arguments<(ins VectorOfRank<[1]>:$source, 958 ScalableVectorOfRank<[1]>:$dest, 959 I64Attr:$pos)>, 960 Results<(outs ScalableVectorOfRank<[1]>:$res)> { 961 let summary = "insert subvector into scalable vector operation"; 962 // NOTE: This operation is designed to map to `llvm.vector.insert`, and its 963 // documentation should be kept aligned with LLVM IR: 964 // https://llvm.org/docs/LangRef.html#llvm-vector-insert-intrinsic 965 let description = [{ 966 This operations takes a rank-1 fixed-length or scalable subvector and 967 inserts it within the destination scalable vector starting from the 968 position specificed by `pos`. If the source vector is scalable, the 969 insertion position will be scaled by the runtime scaling factor of the 970 source subvector. 971 972 The insertion position must be a multiple of the minimum size of the source 973 vector. For the operation to be well defined, the source vector must fit in 974 the destination vector from the specified position. Since the destination 975 vector is scalable and its runtime length is unknown, the validity of the 976 operation can't be verified nor guaranteed at compile time. 977 978 Example: 979 980 ```mlir 981 %2 = vector.scalable.insert %0, %1[8] : vector<4xf32> into vector<[16]xf32> 982 %5 = vector.scalable.insert %3, %4[0] : vector<8xf32> into vector<[4]xf32> 983 %8 = vector.scalable.insert %6, %7[0] : vector<[4]xf32> into vector<[8]xf32> 984 ``` 985 986 Invalid example: 987 ```mlir 988 %2 = vector.scalable.insert %0, %1[5] : vector<4xf32> into vector<[16]xf32> 989 ``` 990 }]; 991 992 let assemblyFormat = [{ 993 $source `,` $dest `[` $pos `]` attr-dict `:` type($source) `into` type($dest) 994 }]; 995 996 let extraClassDeclaration = extraPoisonClassDeclaration # [{ 997 VectorType getSourceVectorType() { 998 return ::llvm::cast<VectorType>(getSource().getType()); 999 } 1000 VectorType getDestVectorType() { 1001 return ::llvm::cast<VectorType>(getDest().getType()); 1002 } 1003 }]; 1004} 1005 1006def Vector_ScalableExtractOp : 1007 Vector_Op<"scalable.extract", [Pure, 1008 AllElementTypesMatch<["source", "res"]>, 1009 PredOpTrait<"position is a multiple of the result length.", 1010 CPred< 1011 "(getPos() % getResultVectorType().getNumElements()) == 0" 1012 >>]>, 1013 Arguments<(ins ScalableVectorOfRank<[1]>:$source, 1014 I64Attr:$pos)>, 1015 Results<(outs VectorOfRank<[1]>:$res)> { 1016 let summary = "extract subvector from scalable vector operation"; 1017 // NOTE: This operation is designed to map to `llvm.vector.extract`, and its 1018 // documentation should be kept aligned with LLVM IR: 1019 // https://llvm.org/docs/LangRef.html#llvm-vector-extract-intrinsic 1020 let description = [{ 1021 Takes rank-1 source vector and a position `pos` within the source 1022 vector, and extracts a subvector starting from that position. 1023 1024 The extraction position must be a multiple of the minimum size of the result 1025 vector. For the operation to be well defined, the destination vector must 1026 fit within the source vector from the specified position. Since the source 1027 vector is scalable and its runtime length is unknown, the validity of the 1028 operation can't be verified nor guaranteed at compile time. 1029 1030 Example: 1031 1032 ```mlir 1033 %1 = vector.scalable.extract %0[8] : vector<4xf32> from vector<[8]xf32> 1034 %3 = vector.scalable.extract %2[0] : vector<[4]xf32> from vector<[8]xf32> 1035 ``` 1036 1037 Invalid example: 1038 ```mlir 1039 %1 = vector.scalable.extract %0[5] : vector<4xf32> from vector<[16]xf32> 1040 ``` 1041 }]; 1042 1043 let assemblyFormat = [{ 1044 $source `[` $pos `]` attr-dict `:` type($res) `from` type($source) 1045 }]; 1046 1047 let extraClassDeclaration = extraPoisonClassDeclaration # [{ 1048 VectorType getSourceVectorType() { 1049 return ::llvm::cast<VectorType>(getSource().getType()); 1050 } 1051 VectorType getResultVectorType() { 1052 return ::llvm::cast<VectorType>(getRes().getType()); 1053 } 1054 }]; 1055} 1056 1057def Vector_InsertStridedSliceOp : 1058 Vector_Op<"insert_strided_slice", [Pure, 1059 PredOpTrait<"operand #0 and result have same element type", 1060 TCresVTEtIsSameAsOpBase<0, 0>>, 1061 AllTypesMatch<["dest", "res"]>]>, 1062 Arguments<(ins AnyVectorOfNonZeroRank:$source, AnyVectorOfNonZeroRank:$dest, I64ArrayAttr:$offsets, 1063 I64ArrayAttr:$strides)>, 1064 Results<(outs AnyVectorOfNonZeroRank:$res)> { 1065 let summary = "strided_slice operation"; 1066 let description = [{ 1067 Takes a k-D source vector, an n-D destination vector (n >= k), n-sized 1068 `offsets` integer array attribute, a k-sized `strides` integer array attribute 1069 and inserts the k-D source vector as a strided subvector at the proper offset 1070 into the n-D destination vector. 1071 1072 At the moment strides must contain only 1s. 1073 1074 Returns an n-D vector that is a copy of the n-D destination vector in which 1075 the last k-D dimensions contain the k-D source vector elements strided at 1076 the proper location as specified by the offsets. 1077 1078 Example: 1079 1080 ```mlir 1081 %2 = vector.insert_strided_slice %0, %1 1082 {offsets = [0, 0, 2], strides = [1, 1]}: 1083 vector<2x4xf32> into vector<16x4x8xf32> 1084 ``` 1085 }]; 1086 1087 let assemblyFormat = [{ 1088 $source `,` $dest attr-dict `:` type($source) `into` type($dest) 1089 }]; 1090 1091 let builders = [ 1092 OpBuilder<(ins "Value":$source, "Value":$dest, 1093 "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$strides)> 1094 ]; 1095 let extraClassDeclaration = [{ 1096 VectorType getSourceVectorType() { 1097 return ::llvm::cast<VectorType>(getSource().getType()); 1098 } 1099 VectorType getDestVectorType() { 1100 return ::llvm::cast<VectorType>(getDest().getType()); 1101 } 1102 bool hasNonUnitStrides() { 1103 return llvm::any_of(getStrides(), [](Attribute attr) { 1104 return ::llvm::cast<IntegerAttr>(attr).getInt() != 1; 1105 }); 1106 } 1107 }]; 1108 1109 let hasFolder = 1; 1110 let hasVerifier = 1; 1111 let hasCanonicalizer = 1; 1112} 1113 1114def Vector_OuterProductOp : 1115 Vector_Op<"outerproduct", [Pure, 1116 PredOpTrait<"lhs operand and result have same element type", 1117 TCresVTEtIsSameAsOpBase<0, 0>>, 1118 PredOpTrait<"rhs operand and result have same element type", 1119 TCresVTEtIsSameAsOpBase<0, 1>>, 1120 DeclareOpInterfaceMethods<MaskableOpInterface>]>, 1121 Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyType:$rhs, 1122 Optional<AnyVectorOfNonZeroRank>:$acc, 1123 DefaultValuedAttr<Vector_CombiningKindAttr, "CombiningKind::ADD">:$kind)>, 1124 Results<(outs AnyVectorOfNonZeroRank)> { 1125 let summary = "vector outerproduct with optional fused add"; 1126 let description = [{ 1127 Takes 2 1-D vectors and returns the 2-D vector containing the outer-product, 1128 as illustrated below: 1129 ``` 1130 outer | [c, d] 1131 ------+------------ 1132 [a, | [ [a*c, a*d], 1133 b] | [b*c, b*d] ] 1134 ``` 1135 This operation also accepts a 1-D vector lhs and a scalar rhs. In this 1136 case a simple AXPY operation is performed, which returns a 1-D vector. 1137 ``` 1138 [a, b] * c = [a*c, b*c] 1139 ``` 1140 1141 An optional extra vector argument with the same shape as the output 1142 vector may be specified in which case the operation returns the sum of 1143 the outer-product and the extra vector. In this multiply-accumulate 1144 scenario for floating-point arguments, the rounding mode is enforced 1145 by guaranteeing that a fused-multiply add operation is emitted. When 1146 lowered to the LLVMIR dialect, this form emits `llvm.intr.fma`, which 1147 is guaranteed to lower to actual `fma` instructions on x86. 1148 1149 An optional kind attribute may be specified to be: `add`/`mul`/`minsi` 1150 /`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for integers, and `add`/`mul` 1151 /`minnumf`/`maxnumf`/`minimumf`/`maximumf` for floats. The default is 1152 `add`. 1153 1154 Example: 1155 1156 ``` 1157 %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32> 1158 return %2: vector<4x8xf32> 1159 1160 %3 = vector.outerproduct %0, %1, %2: 1161 vector<4xf32>, vector<8xf32>, vector<4x8xf32> 1162 return %3: vector<4x8xf32> 1163 1164 %4 = vector.outerproduct %0, %1, %2 {kind = #vector.kind<maxnumf>}: 1165 vector<4xf32>, vector<8xf32>, vector<4x8xf32> 1166 return %3: vector<4x8xf32> 1167 1168 %6 = vector.outerproduct %4, %5: vector<10xf32>, f32 1169 return %6: vector<10xf32> 1170 1171 ``` 1172 }]; 1173 let builders = [ 1174 // Build an op without mask, use the type of `acc` as the return type. 1175 OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc)> 1176 ]; 1177 let extraClassDeclaration = [{ 1178 VectorType getOperandVectorTypeLHS() { 1179 return ::llvm::cast<VectorType>(getLhs().getType()); 1180 } 1181 Type getOperandTypeRHS() { 1182 return getRhs().getType(); 1183 } 1184 VectorType getOperandVectorTypeACC() { 1185 return getAcc() 1186 ? ::llvm::cast<VectorType>(getAcc().getType()) 1187 : VectorType(); 1188 } 1189 VectorType getResultVectorType() { 1190 return ::llvm::cast<VectorType>(getResult().getType()); 1191 } 1192 static CombiningKind getDefaultKind() { 1193 return CombiningKind::ADD; 1194 } 1195 }]; 1196 let hasCustomAssemblyFormat = 1; 1197 let hasVerifier = 1; 1198} 1199 1200def Vector_ExtractStridedSliceOp : 1201 Vector_Op<"extract_strided_slice", [Pure, 1202 PredOpTrait<"operand and result have same element type", 1203 TCresVTEtIsSameAsOpBase<0, 0>>]>, 1204 Arguments<(ins AnyVectorOfNonZeroRank:$vector, I64ArrayAttr:$offsets, 1205 I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>, 1206 Results<(outs AnyVectorOfNonZeroRank)> { 1207 let summary = "extract_strided_slice operation"; 1208 let description = [{ 1209 Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized 1210 `sizes` integer array attribute, a k-sized `strides` integer array 1211 attribute and extracts the n-D subvector at the proper offset. 1212 1213 At the moment strides must contain only 1s. 1214 1215 Returns an n-D vector where the first k-D dimensions match the `sizes` 1216 attribute. The returned subvector contains the elements starting at offset 1217 `offsets` and ending at `offsets + sizes`. 1218 1219 Example: 1220 1221 ```mlir 1222 %1 = vector.extract_strided_slice %0 1223 {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}: 1224 vector<4x8x16xf32> to vector<2x4x16xf32> 1225 1226 // TODO: Evolve to a range form syntax similar to: 1227 %1 = vector.extract_strided_slice %0[0:2:1][2:4:1] 1228 vector<4x8x16xf32> to vector<2x4x16xf32> 1229 ``` 1230 1231 TODO: Implement support for poison indices. 1232 }]; 1233 let builders = [ 1234 OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$offsets, 1235 "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)> 1236 ]; 1237 let extraClassDeclaration = [{ 1238 VectorType getSourceVectorType() { 1239 return ::llvm::cast<VectorType>(getVector().getType()); 1240 } 1241 void getOffsets(SmallVectorImpl<int64_t> &results); 1242 bool hasNonUnitStrides() { 1243 return llvm::any_of(getStrides(), [](Attribute attr) { 1244 return ::llvm::cast<IntegerAttr>(attr).getInt() != 1; 1245 }); 1246 } 1247 }]; 1248 let hasCanonicalizer = 1; 1249 let hasFolder = 1; 1250 let hasVerifier = 1; 1251 let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)"; 1252} 1253 1254// TODO: Tighten semantics so that masks and inbounds can't be used 1255// simultaneously within the same transfer op. 1256def Vector_TransferReadOp : 1257 Vector_Op<"transfer_read", [ 1258 DeclareOpInterfaceMethods<VectorTransferOpInterface>, 1259 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, 1260 DeclareOpInterfaceMethods<MaskableOpInterface>, 1261 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1262 DeclareOpInterfaceMethods<ConditionallySpeculatable>, 1263 AttrSizedOperandSegments, 1264 DestinationStyleOpInterface 1265 ]>, 1266 Arguments<(ins AnyShaped:$source, 1267 Variadic<Index>:$indices, 1268 AffineMapAttr:$permutation_map, 1269 AnyType:$padding, 1270 Optional<VectorOfNonZeroRankOf<[I1]>>:$mask, 1271 BoolArrayAttr:$in_bounds)>, 1272 Results<(outs AnyVectorOfAnyRank:$vector)> { 1273 1274 let summary = "Reads a supervector from memory into an SSA vector value."; 1275 1276 let description = [{ 1277 The `vector.transfer_read` op performs a read from a slice within a 1278 [MemRef](../LangRef.md#memref-type) or a Ranked 1279 [Tensor](../LangRef.md#tensor-type) supplied as its first operand 1280 into a [vector](../LangRef.md#vector-type) of the same base elemental type. 1281 1282 A memref/tensor operand with vector element type, must have its vector 1283 element type match a suffix (shape and element type) of the vector (e.g. 1284 memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>). 1285 1286 The slice is further defined by a full-rank index within the MemRef/Tensor, 1287 supplied as the operands `[1 .. 1 + rank(memref/tensor))` that defines the 1288 starting point of the transfer (e.g. `%A[%i0, %i1, %i2]`). 1289 1290 The permutation_map [attribute](../LangRef.md#attributes) is an 1291 [affine-map](Affine.md#affine-maps) which specifies the transposition on the 1292 slice to match the vector shape. The permutation map may be implicit and 1293 omitted from parsing and printing if it is the canonical minor identity map 1294 (i.e. if it does not permute or broadcast any dimension). 1295 1296 The size of the slice is specified by the size of the vector, given as the 1297 return type. 1298 1299 An SSA value `padding` of the same elemental type as the MemRef/Tensor is 1300 provided to specify a fallback value in the case of out-of-bounds accesses 1301 and/or masking. 1302 1303 An optional SSA value `mask` may be specified to mask out elements read from 1304 the MemRef/Tensor. The `mask` type is an `i1` vector with a shape that 1305 matches how elements are read from the MemRef/Tensor, *before* any 1306 permutation or broadcasting. Elements whose corresponding mask element is 1307 `0` are masked out and replaced with `padding`. 1308 1309 For every vector dimension, the boolean array attribute `in_bounds` 1310 specifies if the transfer is guaranteed to be within the source bounds. If 1311 set to "false", accesses (including the starting point) may run 1312 out-of-bounds along the respective vector dimension as the index increases. 1313 Non-vector dimensions *must* always be in-bounds. The `in_bounds` array 1314 length has to be equal to the vector rank. This attribute has a default 1315 value: `false` (i.e. "out-of-bounds"). When skipped in the textual IR, the 1316 default value is assumed. Similarly, the OP printer will omit this 1317 attribute when all dimensions are out-of-bounds (i.e. the default value is 1318 used). 1319 1320 A `vector.transfer_read` can be lowered to a simple load if all dimensions 1321 are specified to be within bounds and no `mask` was specified. 1322 1323 This operation is called 'read' by opposition to 'load' because the 1324 super-vector granularity is generally not representable with a single 1325 hardware register. A `vector.transfer_read` is thus a mid-level abstraction 1326 that supports super-vectorization with non-effecting padding for full-tile 1327 only operations. 1328 1329 More precisely, let's dive deeper into the permutation_map for the following 1330 MLIR: 1331 1332 ```mlir 1333 vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4] 1334 { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } : 1335 memref<?x?x?x?xf32>, vector<3x4x5xf32> 1336 ``` 1337 1338 This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3, 1339 %expr4]`. The size of the slice can be inferred from the resulting vector 1340 shape and walking back through the permutation map: 3 along d2 and 5 along 1341 d0, so the slice is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]` 1342 1343 That slice needs to be read into a `vector<3x4x5xf32>`. Since the 1344 permutation map is not full rank, there must be a broadcast along vector 1345 dimension `1`. 1346 1347 A notional lowering of vector.transfer_read could generate code resembling: 1348 1349 ```mlir 1350 // %expr1, %expr2, %expr3, %expr4 defined before this point 1351 // alloc a temporary buffer for performing the "gather" of the slice. 1352 %tmp = memref.alloc() : memref<vector<3x4x5xf32>> 1353 for %i = 0 to 3 { 1354 affine.for %j = 0 to 4 { 1355 affine.for %k = 0 to 5 { 1356 // Note that this load does not involve %j. 1357 %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32> 1358 // Update the temporary gathered slice with the individual element 1359 %slice = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> 1360 %updated = vector.insert %a, %slice[%i, %j, %k] : f32 into vector<3x4x5xf32> 1361 memref.store %updated, %tmp : memref<vector<3x4x5xf32>> 1362 }}} 1363 // At this point we gathered the elements from the original 1364 // memref into the desired vector layout, stored in the `%tmp` allocation. 1365 %vec = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> 1366 ``` 1367 1368 On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that 1369 the temporary storage footprint could conceptually be only `3 * 5` values but 1370 `3 * 4 * 5` values are actually transferred between `%A` and `%tmp`. 1371 1372 Alternatively, if a notional vector broadcast operation were available, we 1373 could avoid the loop on `%j` and the lowered code would resemble: 1374 1375 ```mlir 1376 // %expr1, %expr2, %expr3, %expr4 defined before this point 1377 %tmp = memref.alloc() : memref<vector<3x4x5xf32>> 1378 for %i = 0 to 3 { 1379 affine.for %k = 0 to 5 { 1380 %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32> 1381 %slice = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> 1382 // Here we only store to the first element in dimension one 1383 %updated = vector.insert %a, %slice[%i, 0, %k] : f32 into vector<3x4x5xf32> 1384 memref.store %updated, %tmp : memref<vector<3x4x5xf32>> 1385 }} 1386 // At this point we gathered the elements from the original 1387 // memref into the desired vector layout, stored in the `%tmp` allocation. 1388 // However we haven't replicated them alongside the first dimension, we need 1389 // to broadcast now. 1390 %partialVec = load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> 1391 %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32> 1392 ``` 1393 1394 where `broadcast` broadcasts from element 0 to all others along the 1395 specified dimension. This time, the number of loaded element is `3 * 5` 1396 values. 1397 An additional `1` broadcast is required. On a GPU this broadcast could be 1398 implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`. 1399 1400 Syntax 1401 ``` 1402 operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list 1403 `{` attribute-entry `} :` memref-type `,` vector-type 1404 ``` 1405 1406 Example: 1407 1408 ```mlir 1409 // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> 1410 // and pad with %f0 to handle the boundary case: 1411 %f0 = arith.constant 0.0f : f32 1412 affine.for %i0 = 0 to %0 { 1413 affine.for %i1 = 0 to %1 step 256 { 1414 affine.for %i2 = 0 to %2 step 32 { 1415 %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0) 1416 {permutation_map: (d0, d1, d2) -> (d2, d1)} : 1417 memref<?x?x?xf32>, vector<32x256xf32> 1418 }}} 1419 1420 // or equivalently (rewrite with vector.transpose) 1421 %f0 = arith.constant 0.0f : f32 1422 affine.for %i0 = 0 to %0 { 1423 affine.for %i1 = 0 to %1 step 256 { 1424 affine.for %i2 = 0 to %2 step 32 { 1425 %v0 = vector.transfer_read %A[%i0, %i1, %i2], (%f0) 1426 {permutation_map: (d0, d1, d2) -> (d1, d2)} : 1427 memref<?x?x?xf32>, vector<256x32xf32> 1428 %v = vector.transpose %v0, [1, 0] : 1429 vector<256x32xf32> to vector<32x256f32> 1430 }}} 1431 1432 // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into 1433 // vector<128xf32>. The underlying implementation will require a 1-D vector 1434 // broadcast: 1435 affine.for %i0 = 0 to %0 { 1436 affine.for %i1 = 0 to %1 { 1437 %3 = vector.transfer_read %A[%i0, %i1] 1438 {permutation_map: (d0, d1) -> (0)} : 1439 memref<?x?xf32>, vector<128xf32> 1440 } 1441 } 1442 1443 // Read from a memref with vector element type. 1444 %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 1445 {permutation_map = (d0, d1)->(d0, d1)} 1446 : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32> 1447 1448 // Read from a tensor with vector element type. 1449 %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 1450 {permutation_map = (d0, d1)->(d0, d1)} 1451 : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32> 1452 1453 // Special encoding for 0-d transfer with 0-d tensor/memref, vector shape 1454 // {1} and permutation_map () -> (0). 1455 %0 = vector.transfer_read %arg0[], %f0 {permutation_map = affine_map<()->(0)>} : 1456 tensor<f32>, vector<1xf32> 1457 ``` 1458 }]; 1459 1460 let builders = [ 1461 /// 1. Builder that sets padding to zero and an empty mask (variant with attrs). 1462 OpBuilder<(ins "VectorType":$vectorType, 1463 "Value":$source, 1464 "ValueRange":$indices, 1465 "AffineMapAttr":$permutationMapAttr, 1466 "ArrayAttr":$inBoundsAttr)>, 1467 /// 2. Builder that sets padding to zero and an empty mask (variant without attrs). 1468 OpBuilder<(ins "VectorType":$vectorType, 1469 "Value":$source, 1470 "ValueRange":$indices, 1471 "AffineMap":$permutationMap, 1472 CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, 1473 /// 3. Builder that sets permutation map to 'getMinorIdentityMap'. 1474 OpBuilder<(ins "VectorType":$vectorType, 1475 "Value":$source, 1476 "ValueRange":$indices, 1477 "Value":$padding, 1478 CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, 1479 /// 4. Builder that sets padding to zero and permutation map to 1480 /// 'getMinorIdentityMap'. 1481 OpBuilder<(ins "VectorType":$vectorType, 1482 "Value":$source, 1483 "ValueRange":$indices, 1484 CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, 1485 ]; 1486 1487 let extraClassDeclaration = [{ 1488 // MaskableOpInterface methods. 1489 bool supportsPassthru() { return true; } 1490 1491 MutableOperandRange getDpsInitsMutable() { 1492 return MutableOperandRange(getOperation(), /*start=*/0, /*length=*/0); 1493 } 1494 }]; 1495 1496 let hasCanonicalizer = 1; 1497 let hasCustomAssemblyFormat = 1; 1498 let hasFolder = 1; 1499 let hasVerifier = 1; 1500} 1501 1502// TODO: Tighten semantics so that masks and inbounds can't be used 1503// simultaneously within the same transfer op. 1504def Vector_TransferWriteOp : 1505 Vector_Op<"transfer_write", [ 1506 DeclareOpInterfaceMethods<VectorTransferOpInterface>, 1507 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, 1508 DeclareOpInterfaceMethods<MaskableOpInterface>, 1509 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 1510 DeclareOpInterfaceMethods<ConditionallySpeculatable>, 1511 AttrSizedOperandSegments, 1512 DestinationStyleOpInterface 1513 ]>, 1514 Arguments<(ins AnyVectorOfAnyRank:$vector, 1515 AnyShaped:$source, 1516 Variadic<Index>:$indices, 1517 AffineMapAttr:$permutation_map, 1518 Optional<VectorOfNonZeroRankOf<[I1]>>:$mask, 1519 BoolArrayAttr:$in_bounds)>, 1520 Results<(outs Optional<AnyRankedTensor>:$result)> { 1521 1522 let summary = "The vector.transfer_write op writes a supervector to memory."; 1523 1524 let description = [{ 1525 The `vector.transfer_write` op performs a write from a 1526 [vector](../LangRef.md#vector-type), supplied as its first operand, into a 1527 slice within a [MemRef](../LangRef.md#memref-type) or a Ranked 1528 [Tensor](../LangRef.md#tensor-type) of the same base elemental type, 1529 supplied as its second operand. 1530 1531 A vector memref/tensor operand must have its vector element type match a 1532 suffix (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>, 1533 vector<1x1x4x3xf32>). If the operand is a tensor, the operation returns a 1534 new tensor of the same type. 1535 1536 The slice is further defined by a full-rank index within the MemRef/Tensor, 1537 supplied as the operands `[2 .. 2 + rank(memref/tensor))` that defines the 1538 starting point of the transfer (e.g. `%A[%i0, %i1, %i2, %i3]`). 1539 1540 The permutation_map [attribute](../LangRef.md#attributes) is an 1541 [affine-map](Affine.md#affine-maps) which specifies the transposition on the 1542 slice to match the vector shape. The permutation map may be implicit and 1543 omitted from parsing and printing if it is the canonical minor identity map 1544 (i.e. if it does not permute any dimension). In contrast to `transfer_read`, 1545 write ops cannot have broadcast dimensions. 1546 1547 The size of the slice is specified by the size of the vector. 1548 1549 An optional SSA value `mask` may be specified to mask out elements written 1550 to the MemRef/Tensor. The `mask` type is an `i1` vector with a shape that 1551 matches how elements are written into the MemRef/Tensor, *after* applying 1552 any permutation. Elements whose corresponding mask element is `0` are 1553 masked out. 1554 1555 For every vector dimension, the boolean array attribute `in_bounds` 1556 specifies if the transfer is guaranteed to be within the source bounds. If 1557 set to "false", accesses (including the starting point) may run 1558 out-of-bounds along the respective vector dimension as the index increases. 1559 Non-vector dimensions *must* always be in-bounds. The `in_bounds` array 1560 length has to be equal to the vector rank. This attribute has a default 1561 value: `false` (i.e. "out-of-bounds"). When skipped in the textual IR, the 1562 default value is assumed. Similarly, the OP printer will omit this 1563 attribute when all dimensions are out-of-bounds (i.e. the default value is 1564 used). 1565 1566 A `vector.transfer_write` can be lowered to a simple store if all 1567 dimensions are specified to be within bounds and no `mask` was specified. 1568 1569 This operation is called 'write' by opposition to 'store' because the 1570 super-vector granularity is generally not representable with a single 1571 hardware register. A `vector.transfer_write` is thus a 1572 mid-level abstraction that supports super-vectorization with non-effecting 1573 padding for full-tile-only code. It is the responsibility of 1574 `vector.transfer_write`'s implementation to ensure the memory writes are 1575 valid. Different lowerings may be pertinent depending on the hardware 1576 support. 1577 1578 Example: 1579 1580 ```mlir 1581 // write vector<16x32x64xf32> into the slice 1582 // `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`: 1583 for %i0 = 0 to %0 { 1584 affine.for %i1 = 0 to %1 step 32 { 1585 affine.for %i2 = 0 to %2 step 64 { 1586 affine.for %i3 = 0 to %3 step 16 { 1587 %val = `ssa-value` : vector<16x32x64xf32> 1588 vector.transfer_write %val, %A[%i0, %i1, %i2, %i3] 1589 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : 1590 vector<16x32x64xf32>, memref<?x?x?x?xf32> 1591 }}}} 1592 1593 // or equivalently (rewrite with vector.transpose) 1594 for %i0 = 0 to %0 { 1595 affine.for %i1 = 0 to %1 step 32 { 1596 affine.for %i2 = 0 to %2 step 64 { 1597 affine.for %i3 = 0 to %3 step 16 { 1598 %val = `ssa-value` : vector<16x32x64xf32> 1599 %valt = vector.transpose %val, [1, 2, 0] : 1600 vector<16x32x64xf32> -> vector<32x64x16xf32> 1601 vector.transfer_write %valt, %A[%i0, %i1, %i2, %i3] 1602 {permutation_map: (d0, d1, d2, d3) -> (d1, d2, d3)} : 1603 vector<32x64x16xf32>, memref<?x?x?x?xf32> 1604 }}}} 1605 1606 // write to a memref with vector element type. 1607 vector.transfer_write %4, %arg1[%c3, %c3] 1608 {permutation_map = (d0, d1)->(d0, d1)} 1609 : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>> 1610 1611 // return a tensor where the vector is inserted into the source tensor. 1612 %5 = vector.transfer_write %4, %arg1[%c3, %c3] 1613 {permutation_map = (d0, d1)->(d0, d1)} 1614 : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>> 1615 1616 // Special encoding for 0-d transfer with 0-d tensor/memref, vector shape 1617 // {1} and permutation_map () -> (0). 1618 %1 = vector.transfer_write %0, %arg0[] {permutation_map = affine_map<()->(0)>} : 1619 vector<1xf32>, tensor<f32> 1620 ``` 1621 }]; 1622 1623 let builders = [ 1624 /// 1. Builder with type inference. 1625 OpBuilder<(ins "Value":$vector, 1626 "Value":$dest, 1627 "ValueRange":$indices, 1628 "AffineMapAttr":$permutationMapAttr, 1629 "Value":$mask, 1630 "ArrayAttr":$inBoundsAttr)>, 1631 /// 2. Builder with type inference that sets an empty mask (variant with attrs). 1632 OpBuilder<(ins "Value":$vector, 1633 "Value":$dest, 1634 "ValueRange":$indices, 1635 "AffineMapAttr":$permutationMapAttr, 1636 "ArrayAttr":$inBoundsAttr)>, 1637 /// 3. Builder with type inference that sets an empty mask (variant without attrs). 1638 OpBuilder<(ins "Value":$vector, 1639 "Value":$dest, 1640 "ValueRange":$indices, 1641 "AffineMap":$permutationMap, 1642 CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, 1643 /// 4. Builder with type inference that sets an empty mask and sets permutation 1644 /// map to 'getMinorIdentityMap'. 1645 OpBuilder<(ins "Value":$vector, 1646 "Value":$dest, 1647 "ValueRange":$indices, 1648 CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, 1649 ]; 1650 1651 let extraClassDeclaration = [{ 1652 /// This method is added to maintain uniformity with load/store 1653 /// ops of other dialects. 1654 Value getValue() { return getVector(); } 1655 1656 MutableOperandRange getDpsInitsMutable() { return getSourceMutable(); } 1657 }]; 1658 1659 let hasFolder = 1; 1660 let hasCanonicalizer = 1; 1661 let hasCustomAssemblyFormat = 1; 1662 let hasVerifier = 1; 1663} 1664 1665def Vector_LoadOp : Vector_Op<"load"> { 1666 let summary = "reads an n-D slice of memory into an n-D vector"; 1667 let description = [{ 1668 The 'vector.load' operation reads an n-D slice of memory into an n-D 1669 vector. It takes a 'base' memref, an index for each memref dimension and a 1670 result vector type as arguments. It returns a value of the result vector 1671 type. The 'base' memref and indices determine the start memory address from 1672 which to read. Each index provides an offset for each memref dimension 1673 based on the element type of the memref. The shape of the result vector 1674 type determines the shape of the slice read from the start memory address. 1675 The elements along each dimension of the slice are strided by the memref 1676 strides. When loading more than 1 element, only unit strides are allowed 1677 along the most minor memref dimension. These constraints guarantee that 1678 elements read along the first dimension of the slice are contiguous in 1679 memory. 1680 1681 The memref element type can be a scalar or a vector type. If the memref 1682 element type is a scalar, it should match the element type of the result 1683 vector. If the memref element type is vector, it should match the result 1684 vector type. 1685 1686 Example: 0-D vector load on a scalar memref. 1687 ```mlir 1688 %result = vector.load %base[%i, %j] : memref<100x100xf32>, vector<f32> 1689 ``` 1690 1691 Example: 1-D vector load on a scalar memref. 1692 ```mlir 1693 %result = vector.load %base[%i, %j] : memref<100x100xf32>, vector<8xf32> 1694 ``` 1695 1696 Example: 1-D vector load on a vector memref. 1697 ```mlir 1698 %result = vector.load %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32> 1699 ``` 1700 1701 Example: 2-D vector load on a scalar memref. 1702 ```mlir 1703 %result = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32> 1704 ``` 1705 1706 Example: 2-D vector load on a vector memref. 1707 ```mlir 1708 %result = vector.load %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32> 1709 ``` 1710 1711 Representation-wise, the 'vector.load' operation permits out-of-bounds 1712 reads. Support and implementation of out-of-bounds vector loads is 1713 target-specific. No assumptions should be made on the value of elements 1714 loaded out of bounds. Not all targets may support out-of-bounds vector 1715 loads. 1716 1717 Example: Potential out-of-bound vector load. 1718 ```mlir 1719 %result = vector.load %memref[%index] : memref<?xf32>, vector<8xf32> 1720 ``` 1721 1722 Example: Explicit out-of-bound vector load. 1723 ```mlir 1724 %result = vector.load %memref[%c0] : memref<7xf32>, vector<8xf32> 1725 ``` 1726 }]; 1727 1728 let arguments = (ins Arg<AnyMemRef, "the reference to load from", 1729 [MemRead]>:$base, 1730 Variadic<Index>:$indices, 1731 DefaultValuedOptionalAttr<BoolAttr, "false">:$nontemporal); 1732 let results = (outs AnyVectorOfAnyRank:$result); 1733 1734 let extraClassDeclaration = [{ 1735 MemRefType getMemRefType() { 1736 return ::llvm::cast<MemRefType>(getBase().getType()); 1737 } 1738 1739 VectorType getVectorType() { 1740 return ::llvm::cast<VectorType>(getResult().getType()); 1741 } 1742 }]; 1743 1744 let hasFolder = 1; 1745 let hasVerifier = 1; 1746 1747 let assemblyFormat = 1748 "$base `[` $indices `]` attr-dict `:` type($base) `,` type($result)"; 1749} 1750 1751def Vector_StoreOp : Vector_Op<"store"> { 1752 let summary = "writes an n-D vector to an n-D slice of memory"; 1753 let description = [{ 1754 The 'vector.store' operation writes an n-D vector to an n-D slice of memory. 1755 It takes the vector value to be stored, a 'base' memref and an index for 1756 each memref dimension. The 'base' memref and indices determine the start 1757 memory address from which to write. Each index provides an offset for each 1758 memref dimension based on the element type of the memref. The shape of the 1759 vector value to store determines the shape of the slice written from the 1760 start memory address. The elements along each dimension of the slice are 1761 strided by the memref strides. When storing more than 1 element, only unit 1762 strides are allowed along the most minor memref dimension. These constraints 1763 guarantee that elements written along the first dimension of the slice are 1764 contiguous in memory. 1765 1766 The memref element type can be a scalar or a vector type. If the memref 1767 element type is a scalar, it should match the element type of the value 1768 to store. If the memref element type is vector, it should match the type 1769 of the value to store. 1770 1771 Example: 0-D vector store on a scalar memref. 1772 ```mlir 1773 vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<f32> 1774 ``` 1775 1776 Example: 1-D vector store on a scalar memref. 1777 ```mlir 1778 vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<8xf32> 1779 ``` 1780 1781 Example: 1-D vector store on a vector memref. 1782 ```mlir 1783 vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32> 1784 ``` 1785 1786 Example: 2-D vector store on a scalar memref. 1787 ```mlir 1788 vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32> 1789 ``` 1790 1791 Example: 2-D vector store on a vector memref. 1792 ```mlir 1793 vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32> 1794 ``` 1795 1796 Representation-wise, the 'vector.store' operation permits out-of-bounds 1797 writes. Support and implementation of out-of-bounds vector stores are 1798 target-specific. No assumptions should be made on the memory written out of 1799 bounds. Not all targets may support out-of-bounds vector stores. 1800 1801 Example: Potential out-of-bounds vector store. 1802 ```mlir 1803 vector.store %valueToStore, %memref[%index] : memref<?xf32>, vector<8xf32> 1804 ``` 1805 1806 Example: Explicit out-of-bounds vector store. 1807 ```mlir 1808 vector.store %valueToStore, %memref[%c0] : memref<7xf32>, vector<8xf32> 1809 ``` 1810 }]; 1811 1812 let arguments = (ins 1813 AnyVectorOfAnyRank:$valueToStore, 1814 Arg<AnyMemRef, "the reference to store to", 1815 [MemWrite]>:$base, 1816 Variadic<Index>:$indices, 1817 DefaultValuedOptionalAttr<BoolAttr, "false">:$nontemporal 1818 ); 1819 1820 let extraClassDeclaration = [{ 1821 MemRefType getMemRefType() { 1822 return ::llvm::cast<MemRefType>(getBase().getType()); 1823 } 1824 1825 VectorType getVectorType() { 1826 return ::llvm::cast<VectorType>(getValueToStore().getType()); 1827 } 1828 }]; 1829 1830 let hasFolder = 1; 1831 let hasVerifier = 1; 1832 1833 let assemblyFormat = "$valueToStore `,` $base `[` $indices `]` attr-dict " 1834 "`:` type($base) `,` type($valueToStore)"; 1835} 1836 1837def Vector_MaskedLoadOp : 1838 Vector_Op<"maskedload">, 1839 Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base, 1840 Variadic<Index>:$indices, 1841 VectorOfNonZeroRankOf<[I1]>:$mask, 1842 AnyVectorOfNonZeroRank:$pass_thru)>, 1843 Results<(outs AnyVectorOfNonZeroRank:$result)> { 1844 1845 let summary = "loads elements from memory into a vector as defined by a mask vector"; 1846 1847 let description = [{ 1848 The masked load reads elements from memory into a vector as defined 1849 by a base with indices and a mask vector. When the mask is set, the 1850 element is read from memory. Otherwise, the corresponding element is taken 1851 from a pass-through vector. Informally the semantics are: 1852 ``` 1853 result[0] := if mask[0] then base[i + 0] else pass_thru[0] 1854 result[1] := if mask[1] then base[i + 1] else pass_thru[1] 1855 etc. 1856 ``` 1857 1858 If a mask bit is set and the corresponding index is out-of-bounds for the 1859 given base, the behavior is undefined. If a mask bit is not set, the value 1860 comes from the pass-through vector regardless of the index, and the index is 1861 allowed to be out-of-bounds. 1862 1863 The masked load can be used directly where applicable, or can be used 1864 during progressively lowering to bring other memory operations closer to 1865 hardware ISA support for a masked load. The semantics of the operation 1866 closely correspond to those of the `llvm.masked.load` 1867 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics). 1868 1869 Examples: 1870 1871 ```mlir 1872 %0 = vector.maskedload %base[%i], %mask, %pass_thru 1873 : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32> 1874 1875 %1 = vector.maskedload %base[%i, %j], %mask, %pass_thru 1876 : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 1877 ``` 1878 }]; 1879 let extraClassDeclaration = [{ 1880 MemRefType getMemRefType() { 1881 return ::llvm::cast<MemRefType>(getBase().getType()); 1882 } 1883 VectorType getMaskVectorType() { 1884 return ::llvm::cast<VectorType>(getMask().getType()); 1885 } 1886 VectorType getPassThruVectorType() { 1887 return ::llvm::cast<VectorType>(getPassThru().getType()); 1888 } 1889 VectorType getVectorType() { 1890 return ::llvm::cast<VectorType>(getResult().getType()); 1891 } 1892 }]; 1893 let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` " 1894 "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)"; 1895 let hasCanonicalizer = 1; 1896 let hasFolder = 1; 1897 let hasVerifier = 1; 1898} 1899 1900def Vector_MaskedStoreOp : 1901 Vector_Op<"maskedstore">, 1902 Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base, 1903 Variadic<Index>:$indices, 1904 VectorOfNonZeroRankOf<[I1]>:$mask, 1905 AnyVectorOfNonZeroRank:$valueToStore)> { 1906 1907 let summary = "stores elements from a vector into memory as defined by a mask vector"; 1908 1909 let description = [{ 1910 The masked store operation writes elements from a vector into memory 1911 as defined by a base with indices and a mask vector. When the mask is 1912 set, the corresponding element from the vector is written to memory. Otherwise, 1913 no action is taken for the element. Informally the semantics are: 1914 ``` 1915 if (mask[0]) base[i+0] = value[0] 1916 if (mask[1]) base[i+1] = value[1] 1917 etc. 1918 ``` 1919 1920 If a mask bit is set and the corresponding index is out-of-bounds for the 1921 given base, the behavior is undefined. If a mask bit is not set, no value 1922 is stored regardless of the index, and the index is allowed to be 1923 out-of-bounds. 1924 1925 The masked store can be used directly where applicable, or can be used 1926 during progressively lowering to bring other memory operations closer to 1927 hardware ISA support for a masked store. The semantics of the operation 1928 closely correspond to those of the `llvm.masked.store` 1929 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-store-intrinsics). 1930 1931 Examples: 1932 1933 ```mlir 1934 vector.maskedstore %base[%i], %mask, %value 1935 : memref<?xf32>, vector<8xi1>, vector<8xf32> 1936 1937 vector.maskedstore %base[%i, %j], %mask, %value 1938 : memref<?x?xf32>, vector<16xi1>, vector<16xf32> 1939 ``` 1940 }]; 1941 let extraClassDeclaration = [{ 1942 MemRefType getMemRefType() { 1943 return ::llvm::cast<MemRefType>(getBase().getType()); 1944 } 1945 VectorType getMaskVectorType() { 1946 return ::llvm::cast<VectorType>(getMask().getType()); 1947 } 1948 VectorType getVectorType() { 1949 return ::llvm::cast<VectorType>(getValueToStore().getType()); 1950 } 1951 }]; 1952 let assemblyFormat = 1953 "$base `[` $indices `]` `,` $mask `,` $valueToStore " 1954 "attr-dict `:` type($base) `,` type($mask) `,` type($valueToStore)"; 1955 let hasCanonicalizer = 1; 1956 let hasFolder = 1; 1957 let hasVerifier = 1; 1958} 1959 1960def Vector_GatherOp : 1961 Vector_Op<"gather", [ 1962 DeclareOpInterfaceMethods<MaskableOpInterface>, 1963 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> 1964 ]>, 1965 Arguments<(ins Arg<AnyShaped, "", [MemRead]>:$base, 1966 Variadic<Index>:$indices, 1967 VectorOfNonZeroRankOf<[AnyInteger, Index]>:$index_vec, 1968 VectorOfNonZeroRankOf<[I1]>:$mask, 1969 AnyVectorOfNonZeroRank:$pass_thru)>, 1970 Results<(outs AnyVectorOfNonZeroRank:$result)> { 1971 1972 let summary = [{ 1973 gathers elements from memory or ranked tensor into a vector as defined by an 1974 index vector and a mask vector 1975 }]; 1976 1977 let description = [{ 1978 The gather operation returns an n-D vector whose elements are either loaded 1979 from memory or ranked tensor, or taken from a pass-through vector, depending 1980 on the values of an n-D mask vector. 1981 If a mask bit is set, the corresponding result element is defined by the base 1982 with indices and the n-D index vector (each index is a 1-D offset on the base). 1983 Otherwise, the corresponding element is taken from the n-D pass-through vector. 1984 Informally the semantics are: 1985 ``` 1986 result[0] := if mask[0] then base[index[0]] else pass_thru[0] 1987 result[1] := if mask[1] then base[index[1]] else pass_thru[1] 1988 etc. 1989 ``` 1990 1991 If a mask bit is set and the corresponding index is out-of-bounds for the 1992 given base, the behavior is undefined. If a mask bit is not set, the value 1993 comes from the pass-through vector regardless of the index, and the index is 1994 allowed to be out-of-bounds. 1995 1996 The gather operation can be used directly where applicable, or can be used 1997 during progressively lowering to bring other memory operations closer to 1998 hardware ISA support for a gather. 1999 2000 Examples: 2001 2002 ```mlir 2003 %0 = vector.gather %base[%c0][%v], %mask, %pass_thru 2004 : memref<?xf32>, vector<2x16xi32>, vector<2x16xi1>, vector<2x16xf32> into vector<2x16xf32> 2005 2006 %1 = vector.gather %base[%i, %j][%v], %mask, %pass_thru 2007 : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2008 ``` 2009 }]; 2010 2011 let extraClassDeclaration = [{ 2012 ShapedType getBaseType() { return getBase().getType(); } 2013 VectorType getIndexVectorType() { return getIndexVec().getType(); } 2014 VectorType getMaskVectorType() { return getMask().getType(); } 2015 VectorType getPassThruVectorType() { return getPassThru().getType(); } 2016 VectorType getVectorType() { return getResult().getType(); } 2017 }]; 2018 2019 let assemblyFormat = 2020 "$base `[` $indices `]` `[` $index_vec `]` `,` " 2021 "$mask `,` $pass_thru attr-dict `:` type($base) `,` " 2022 "type($index_vec) `,` type($mask) `,` type($pass_thru) " 2023 "`into` type($result)"; 2024 let hasCanonicalizer = 1; 2025 let hasVerifier = 1; 2026} 2027 2028def Vector_ScatterOp : 2029 Vector_Op<"scatter">, 2030 Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base, 2031 Variadic<Index>:$indices, 2032 VectorOfRankAndType<[1], [AnyInteger, Index]>:$index_vec, 2033 VectorOfRankAndType<[1], [I1]>:$mask, 2034 VectorOfRank<[1]>:$valueToStore)> { 2035 2036 let summary = [{ 2037 scatters elements from a vector into memory as defined by an index vector 2038 and a mask vector 2039 }]; 2040 2041 let description = [{ 2042 The scatter operation stores elements from a 1-D vector into memory as 2043 defined by a base with indices and an additional 1-D index vector, but 2044 only if the corresponding bit in a 1-D mask vector is set. Otherwise, no 2045 action is taken for that element. Informally the semantics are: 2046 ``` 2047 if (mask[0]) base[index[0]] = value[0] 2048 if (mask[1]) base[index[1]] = value[1] 2049 etc. 2050 ``` 2051 2052 If a mask bit is set and the corresponding index is out-of-bounds for the 2053 given base, the behavior is undefined. If a mask bit is not set, no value 2054 is stored regardless of the index, and the index is allowed to be 2055 out-of-bounds. 2056 2057 If the index vector contains two or more duplicate indices, the behavior is 2058 undefined. Underlying implementation may enforce strict sequential 2059 semantics. 2060 TODO: always enforce strict sequential semantics? 2061 2062 The scatter operation can be used directly where applicable, or can be used 2063 during progressively lowering to bring other memory operations closer to 2064 hardware ISA support for a scatter. The semantics of the operation closely 2065 correspond to those of the `llvm.masked.scatter` 2066 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-scatter-intrinsics). 2067 2068 Examples: 2069 2070 ```mlir 2071 vector.scatter %base[%c0][%v], %mask, %value 2072 : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> 2073 2074 vector.scatter %base[%i, %j][%v], %mask, %value 2075 : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> 2076 ``` 2077 }]; 2078 2079 let extraClassDeclaration = [{ 2080 MemRefType getMemRefType() { return getBase().getType(); } 2081 VectorType getIndexVectorType() { return getIndexVec().getType(); } 2082 VectorType getMaskVectorType() { return getMask().getType(); } 2083 VectorType getVectorType() { return getValueToStore().getType(); } 2084 }]; 2085 2086 let assemblyFormat = 2087 "$base `[` $indices `]` `[` $index_vec `]` `,` " 2088 "$mask `,` $valueToStore attr-dict `:` type($base) `,` " 2089 "type($index_vec) `,` type($mask) `,` type($valueToStore)"; 2090 let hasCanonicalizer = 1; 2091 let hasVerifier = 1; 2092} 2093 2094def Vector_ExpandLoadOp : 2095 Vector_Op<"expandload">, 2096 Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base, 2097 Variadic<Index>:$indices, 2098 FixedVectorOfNonZeroRankOf<[I1]>:$mask, 2099 AnyVectorOfNonZeroRank:$pass_thru)>, 2100 Results<(outs AnyVectorOfNonZeroRank:$result)> { 2101 2102 let summary = "reads elements from memory and spreads them into a vector as defined by a mask"; 2103 2104 let description = [{ 2105 The expand load reads elements from memory into a vector as defined by a 2106 base with indices and a mask vector. Expansion only applies to the innermost 2107 dimension. When the mask is set, the next element is read from memory. 2108 Otherwise, the corresponding element is taken from a pass-through vector. 2109 Informally the semantics are: 2110 2111 ``` 2112 index = i 2113 result[0] := if mask[0] then base[index++] else pass_thru[0] 2114 result[1] := if mask[1] then base[index++] else pass_thru[1] 2115 etc. 2116 ``` 2117 2118 Note that the index increment is done conditionally. 2119 2120 If a mask bit is set and the corresponding index is out-of-bounds for the 2121 given base, the behavior is undefined. If a mask bit is not set, the value 2122 comes from the pass-through vector regardless of the index, and the index is 2123 allowed to be out-of-bounds. 2124 2125 The expand load can be used directly where applicable, or can be used 2126 during progressively lowering to bring other memory operations closer to 2127 hardware ISA support for an expand. The semantics of the operation closely 2128 correspond to those of the `llvm.masked.expandload` 2129 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-expandload-intrinsics). 2130 2131 Note, at the moment this Op is only available for fixed-width vectors. 2132 2133 Examples: 2134 2135 ```mlir 2136 %0 = vector.expandload %base[%i], %mask, %pass_thru 2137 : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32> 2138 2139 %1 = vector.expandload %base[%i, %j], %mask, %pass_thru 2140 : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2141 ``` 2142 }]; 2143 let extraClassDeclaration = [{ 2144 MemRefType getMemRefType() { 2145 return ::llvm::cast<MemRefType>(getBase().getType()); 2146 } 2147 VectorType getMaskVectorType() { 2148 return ::llvm::cast<VectorType>(getMask().getType()); 2149 } 2150 VectorType getPassThruVectorType() { 2151 return ::llvm::cast<VectorType>(getPassThru().getType()); 2152 } 2153 VectorType getVectorType() { 2154 return ::llvm::cast<VectorType>(getResult().getType()); 2155 } 2156 }]; 2157 let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` " 2158 "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)"; 2159 let hasCanonicalizer = 1; 2160 let hasVerifier = 1; 2161} 2162 2163def Vector_CompressStoreOp : 2164 Vector_Op<"compressstore">, 2165 Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base, 2166 Variadic<Index>:$indices, 2167 FixedVectorOfNonZeroRankOf<[I1]>:$mask, 2168 AnyVectorOfNonZeroRank:$valueToStore)> { 2169 2170 let summary = "writes elements selectively from a vector as defined by a mask"; 2171 2172 let description = [{ 2173 The compress store operation writes elements from a vector into memory as 2174 defined by a base with indices and a mask vector. Compression only applies 2175 to the innermost dimension. When the mask is set, the corresponding element 2176 from the vector is written next to memory. Otherwise, no action is taken 2177 for the element. Informally the semantics are: 2178 2179 ``` 2180 index = i 2181 if (mask[0]) base[index++] = value[0] 2182 if (mask[1]) base[index++] = value[1] 2183 etc. 2184 ``` 2185 2186 Note that the index increment is done conditionally. 2187 2188 If a mask bit is set and the corresponding index is out-of-bounds for the 2189 given base, the behavior is undefined. If a mask bit is not set, no value 2190 is stored regardless of the index, and the index is allowed to be 2191 out-of-bounds. 2192 2193 The compress store can be used directly where applicable, or can be used 2194 during progressively lowering to bring other memory operations closer to 2195 hardware ISA support for a compress. The semantics of the operation closely 2196 correspond to those of the `llvm.masked.compressstore` 2197 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-compressstore-intrinsics). 2198 2199 Note, at the moment this Op is only available for fixed-width vectors. 2200 2201 Examples: 2202 2203 ```mlir 2204 vector.compressstore %base[%i], %mask, %value 2205 : memref<?xf32>, vector<8xi1>, vector<8xf32> 2206 2207 vector.compressstore %base[%i, %j], %mask, %value 2208 : memref<?x?xf32>, vector<16xi1>, vector<16xf32> 2209 ``` 2210 }]; 2211 let extraClassDeclaration = [{ 2212 MemRefType getMemRefType() { 2213 return ::llvm::cast<MemRefType>(getBase().getType()); 2214 } 2215 VectorType getMaskVectorType() { 2216 return ::llvm::cast<VectorType>(getMask().getType()); 2217 } 2218 VectorType getVectorType() { 2219 return ::llvm::cast<VectorType>(getValueToStore().getType()); 2220 } 2221 }]; 2222 let assemblyFormat = 2223 "$base `[` $indices `]` `,` $mask `,` $valueToStore attr-dict `:` " 2224 "type($base) `,` type($mask) `,` type($valueToStore)"; 2225 let hasCanonicalizer = 1; 2226 let hasVerifier = 1; 2227} 2228 2229def Vector_ShapeCastOp : 2230 Vector_Op<"shape_cast", [Pure, 2231 DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]> 2232 ]>, 2233 Arguments<(ins AnyVectorOfAnyRank:$source)>, 2234 Results<(outs AnyVectorOfAnyRank:$result)> { 2235 let summary = "shape_cast casts between vector shapes"; 2236 let description = [{ 2237 The shape_cast operation casts between an n-D source vector shape and 2238 a k-D result vector shape (the element type remains the same). 2239 2240 If reducing rank (n > k), result dimension sizes must be a product 2241 of contiguous source dimension sizes. 2242 If expanding rank (n < k), source dimensions must factor into a 2243 contiguous sequence of destination dimension sizes. 2244 Each source dim is expanded (or contiguous sequence of source dims combined) 2245 in source dimension list order (i.e. 0 <= i < n), to produce a contiguous 2246 sequence of result dims (or a single result dim), in result dimension list 2247 order (i.e. 0 <= j < k). The product of all source dimension sizes and all 2248 result dimension sizes must match. 2249 2250 It is currently assumed that this operation does not require moving data, 2251 and that it will be folded away before lowering vector operations. 2252 2253 There is an exception to the folding expectation when targeting 2254 llvm.intr.matrix operations. We need a type conversion back and forth from a 2255 2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM 2256 is supported in that particular case, for now. 2257 2258 Example: 2259 2260 ```mlir 2261 // Example casting to a lower vector rank. 2262 %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32> 2263 2264 // Example casting to a higher vector rank. 2265 %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32> 2266 2267 ``` 2268 }]; 2269 let extraClassDeclaration = [{ 2270 VectorType getSourceVectorType() { 2271 return ::llvm::cast<VectorType>(getSource().getType()); 2272 } 2273 VectorType getResultVectorType() { 2274 return ::llvm::cast<VectorType>(getResult().getType()); 2275 } 2276 }]; 2277 let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)"; 2278 let hasFolder = 1; 2279 let hasCanonicalizer = 1; 2280 let hasVerifier = 1; 2281} 2282 2283def Vector_BitCastOp : 2284 Vector_Op<"bitcast", [Pure, AllRanksMatch<["source", "result"]>]>, 2285 Arguments<(ins AnyVectorOfAnyRank:$source)>, 2286 Results<(outs AnyVectorOfAnyRank:$result)>{ 2287 let summary = "bitcast casts between vectors"; 2288 let description = [{ 2289 The bitcast operation casts between vectors of the same rank, the minor 1-D 2290 vector size is casted to a vector with a different element type but same 2291 bitwidth. In case of 0-D vectors, the bitwidth of element types must be 2292 equal. 2293 2294 Example: 2295 2296 ```mlir 2297 // Example casting to a smaller element type. 2298 %1 = vector.bitcast %0 : vector<5x1x4x3xf32> to vector<5x1x4x6xi16> 2299 2300 // Example casting to a bigger element type. 2301 %3 = vector.bitcast %2 : vector<10x12x8xi8> to vector<10x12x2xi32> 2302 2303 // Example casting to an element type of the same size. 2304 %5 = vector.bitcast %4 : vector<5x1x4x3xf32> to vector<5x1x4x3xi32> 2305 2306 // Example casting of 0-D vectors. 2307 %7 = vector.bitcast %6 : vector<f32> to vector<i32> 2308 ``` 2309 }]; 2310 let extraClassDeclaration = [{ 2311 VectorType getSourceVectorType() { 2312 return ::llvm::cast<VectorType>(getSource().getType()); 2313 } 2314 VectorType getResultVectorType() { 2315 return ::llvm::cast<VectorType>(getResult().getType()); 2316 } 2317 }]; 2318 let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)"; 2319 let hasFolder = 1; 2320 let hasVerifier = 1; 2321} 2322 2323def Vector_TypeCastOp : 2324 Vector_Op<"type_cast", [Pure, ViewLikeOpInterface]>, 2325 Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>, 2326 Results<(outs AnyMemRef:$result)> { 2327 let summary = "type_cast op converts a scalar memref to a vector memref"; 2328 let description = [{ 2329 Performs a conversion from a memref with scalar element to a memref with a 2330 *single* vector element, copying the shape of the memref to the vector. This 2331 is the minimal viable operation that is required to makeke 2332 super-vectorization operational. It can be seen as a special case of the 2333 `view` operation but scoped in the super-vectorization context. 2334 2335 Example: 2336 2337 ```mlir 2338 %A = memref.alloc() : memref<5x4x3xf32> 2339 %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>> 2340 ``` 2341 }]; 2342 2343 /// Build the canonical memRefType with a single vector. 2344 /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref<vector<4 x 5 x 6 x f32>>. 2345 let builders = [OpBuilder<(ins "Value":$source)>]; 2346 2347 let extraClassDeclaration = [{ 2348 MemRefType getMemRefType() { 2349 return ::llvm::cast<MemRefType>(getMemref().getType()); 2350 } 2351 MemRefType getResultMemRefType() { 2352 return ::llvm::cast<MemRefType>(getResult().getType()); 2353 } 2354 // Implement ViewLikeOpInterface. 2355 Value getViewSource() { return getMemref(); } 2356 }]; 2357 2358 let assemblyFormat = [{ 2359 $memref attr-dict `:` type($memref) `to` type($result) 2360 }]; 2361 let hasVerifier = 1; 2362} 2363 2364def Vector_ConstantMaskOp : 2365 Vector_Op<"constant_mask", [Pure]>, 2366 Arguments<(ins DenseI64ArrayAttr:$mask_dim_sizes)>, 2367 Results<(outs VectorOfAnyRankOf<[I1]>)> { 2368 let summary = "creates a constant vector mask"; 2369 let description = [{ 2370 Creates and returns a vector mask where elements of the result vector 2371 are set to '0' or '1', based on whether the element indices are contained 2372 within a hyper-rectangular region specified by the 'mask_dim_sizes' 2373 array attribute argument. Each element of the 'mask_dim_sizes' array, 2374 specifies an exclusive upper bound [0, mask-dim-size-element-value) 2375 for a unique dimension in the vector result. The conjunction of the ranges 2376 define a hyper-rectangular region within which elements values are set to 1 2377 (otherwise element values are set to 0). Each value of 'mask_dim_sizes' must 2378 be non-negative and not greater than the size of the corresponding vector 2379 dimension (as opposed to vector.create_mask which allows this). Sizes that 2380 correspond to scalable dimensions are implicitly multiplied by vscale, 2381 though currently only zero (none set) or the size of the dim/vscale 2382 (all set) are supported. 2383 2384 Example: 2385 2386 ```mlir 2387 // create a constant vector mask of size 4x3xi1 with elements in range 2388 // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0). 2389 %1 = vector.constant_mask [3, 2] : vector<4x3xi1> 2390 2391 print %1 2392 columns 2393 0 1 2 2394 |------------ 2395 0 | 1 1 0 2396 rows 1 | 1 1 0 2397 2 | 1 1 0 2398 3 | 0 0 0 2399 ``` 2400 }]; 2401 2402 let builders = [ 2403 // Build with mixed static/dynamic operands. 2404 OpBuilder<(ins "VectorType":$type, "ConstantMaskKind":$kind)> 2405 ]; 2406 2407 let extraClassDeclaration = [{ 2408 /// Return the result type of this op. 2409 VectorType getVectorType() { 2410 return cast<VectorType>(getOperation()->getResultTypes()[0]); 2411 } 2412 2413 /// Return whether the mask is a uniform vector of `1`s. 2414 bool isAllOnesMask(); 2415 }]; 2416 2417 let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)"; 2418 let hasVerifier = 1; 2419} 2420 2421def Vector_CreateMaskOp : 2422 Vector_Op<"create_mask", [Pure]>, 2423 Arguments<(ins Variadic<Index>:$operands)>, 2424 Results<(outs VectorOfAnyRankOf<[I1]>)> { 2425 let summary = "creates a vector mask"; 2426 let description = [{ 2427 Creates and returns a vector mask where elements of the result vector 2428 are set to '0' or '1', based on whether the element indices are contained 2429 within a hyper-rectangular region specified by the operands. Specifically, 2430 each operand specifies a range [0, operand-value) for a unique dimension in 2431 the vector result. The conjunction of the operand ranges define a 2432 hyper-rectangular region within which elements values are set to 1 2433 (otherwise element values are set to 0). If operand-value is negative, it is 2434 treated as if it were zero, and if it is greater than the corresponding 2435 dimension size, it is treated as if it were equal to the dimension size. 2436 2437 Example: 2438 2439 ```mlir 2440 // create a vector mask of size 4x3xi1 where elements in range 2441 // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0). 2442 %1 = vector.create_mask %c3, %c2 : vector<4x3xi1> 2443 2444 print %1 2445 columns 2446 0 1 2 2447 |------------ 2448 0 | 1 1 0 2449 rows 1 | 1 1 0 2450 2 | 1 1 0 2451 3 | 0 0 0 2452 ``` 2453 }]; 2454 2455 let builders = [ 2456 // Build with mixed static/dynamic operands. 2457 OpBuilder<(ins "VectorType":$type, "ArrayRef<OpFoldResult>":$mixedOperands)> 2458 ]; 2459 2460 let extraClassDeclaration = [{ 2461 /// Return the result type of this op. 2462 VectorType getVectorType() { 2463 return cast<VectorType>(getOperation()->getResultTypes()[0]); 2464 } 2465 }]; 2466 2467 let hasCanonicalizer = 1; 2468 let hasVerifier = 1; 2469 let assemblyFormat = "$operands attr-dict `:` type(results)"; 2470} 2471 2472def Vector_MaskOp : Vector_Op<"mask", [ 2473 SingleBlockImplicitTerminator<"vector::YieldOp">, 2474 DeclareOpInterfaceMethods<MaskingOpInterface>, 2475 RecursiveMemoryEffects, NoRegionArguments 2476]> { 2477 let summary = "Predicates a maskable vector operation"; 2478 let description = [{ 2479 The `vector.mask` is a `MaskingOpInterface` operation that predicates the 2480 execution of another operation. It takes an `i1` vector mask and an 2481 optional passthru vector as arguments. 2482 2483 A implicitly `vector.yield`-terminated region encloses the operation to be 2484 masked. Values used within the region are captured from above. Only one 2485 *maskable* operation can be masked with a `vector.mask` operation at a time. 2486 An operation is *maskable* if it implements the `MaskableOpInterface`. The 2487 terminator yields all results of the maskable operation to the result of 2488 this operation. 2489 2490 The vector mask argument holds a bit for each vector lane and determines 2491 which vector lanes should execute the maskable operation and which ones 2492 should not. The `vector.mask` operation returns the value produced by the 2493 masked execution of the nested operation, if any. The masked-off lanes in 2494 the result vector are taken from the corresponding lanes of the pass-thru 2495 argument, if provided, or left unmodified, otherwise. At this point, 0-D 2496 vectors are not supported by `vector.mask`. They may be supported in the 2497 future. 2498 2499 The `vector.mask` operation does not prescribe how a maskable operation 2500 should be masked or how a masked operation should be lowered. Masking 2501 constraints and some semantic details are provided by each maskable 2502 operation through the `MaskableOpInterface`. Lowering of masked operations 2503 is implementation defined. For instance, scalarizing the masked operation 2504 or executing the operation for the masked-off lanes are valid lowerings as 2505 long as the execution of masked-off lanes does not change the observable 2506 behavior of the program. 2507 2508 Examples: 2509 2510 ``` 2511 %0 = vector.mask %mask { vector.reduction <add>, %a : vector<8xi32> into i32 } : vector<8xi1> -> i32 2512 ``` 2513 2514 ``` 2515 %0 = vector.mask %mask, %passthru { arith.divsi %a, %b : vector<8xi32> } : vector<8xi1> -> vector<8xi32> 2516 ``` 2517 2518 ``` 2519 vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, memref<?xf32> } : vector<16xi1> 2520 ``` 2521 2522 ``` 2523 vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, tensor<?xf32> } : vector<16xi1> -> tensor<?xf32> 2524 ``` 2525 }]; 2526 2527 // TODO: Support multiple passthru values. 2528 let arguments = (ins VectorOfNonZeroRankOf<[I1]>:$mask, 2529 Optional<AnyType>:$passthru); 2530 let results = (outs Variadic<AnyType>:$results); 2531 let regions = (region SizedRegion<1>:$maskRegion); 2532 2533 let skipDefaultBuilders = 1; 2534 let builders = [ 2535 OpBuilder<(ins "Value":$mask, "Operation *":$maskableOp, 2536 CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)>, 2537 OpBuilder<(ins "TypeRange":$resultTypes, "Value":$mask, "Operation *":$maskableOp, 2538 CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)>, 2539 OpBuilder<(ins "TypeRange":$resultTypes, "Value":$mask, "Value":$passthru, 2540 "Operation *":$maskableOp, 2541 CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)> 2542 ]; 2543 2544 let extraClassDeclaration = [{ 2545 Block *getMaskBlock() { return &getMaskRegion().front(); } 2546 2547 /// Returns true if mask op is not masking any operation. 2548 bool isEmpty() { 2549 Block *block = getMaskBlock(); 2550 if (block->getOperations().size() > 1) 2551 return false; 2552 return true; 2553 } 2554 2555 static void ensureTerminator(Region ®ion, Builder &builder, 2556 Location loc); 2557 }]; 2558 2559 let hasCanonicalizer = 1; 2560 let hasFolder = 1; 2561 let hasCustomAssemblyFormat = 1; 2562 let hasVerifier = 1; 2563} 2564 2565def Vector_TransposeOp : 2566 Vector_Op<"transpose", [Pure, 2567 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, 2568 PredOpTrait<"operand and result have same element type", 2569 TCresVTEtIsSameAsOpBase<0, 0>>]> { 2570 let summary = "vector transpose operation"; 2571 let description = [{ 2572 Takes a n-D vector and returns the transposed n-D vector defined by 2573 the permutation of ranks in the n-sized integer array attribute (in case 2574 of 0-D vectors the array attribute must be empty). 2575 2576 In the operation 2577 2578 ```mlir 2579 %1 = vector.transpose %0, [i_1, .., i_n] 2580 : vector<d_1 x .. x d_n x f32> 2581 to vector<d_trans[0] x .. x d_trans[n-1] x f32> 2582 ``` 2583 2584 the `permutation` array [i_1, .., i_n] must be a permutation of [0, .., n-1]. 2585 2586 Example: 2587 2588 ```mlir 2589 %1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32> 2590 2591 [ [a, b, c], [ [a, d], 2592 [d, e, f] ] -> [b, e], 2593 [c, f] ] 2594 ``` 2595 }]; 2596 2597 let arguments = (ins AnyVectorOfAnyRank:$vector, 2598 DenseI64ArrayAttr:$permutation); 2599 let results = (outs AnyVectorOfAnyRank:$result); 2600 2601 let builders = [ 2602 OpBuilder<(ins "Value":$vector, "ArrayRef<int64_t>":$permutation)> 2603 ]; 2604 let extraClassDeclaration = [{ 2605 VectorType getSourceVectorType() { 2606 return ::llvm::cast<VectorType>(getVector().getType()); 2607 } 2608 VectorType getResultVectorType() { 2609 return ::llvm::cast<VectorType>(getResult().getType()); 2610 } 2611 }]; 2612 let assemblyFormat = [{ 2613 $vector `,` $permutation attr-dict `:` type($vector) `to` type($result) 2614 }]; 2615 let hasCanonicalizer = 1; 2616 let hasFolder = 1; 2617 let hasVerifier = 1; 2618} 2619 2620def Vector_PrintOp : 2621 Vector_Op<"print", [ 2622 MemoryEffects<[MemWrite]>, 2623 PredOpTrait< 2624 "`source` or `punctuation` are not set when printing strings", 2625 CPred<"!getStringLiteral() || (!getSource() && getPunctuation() == PrintPunctuation::NewLine)"> 2626 >, 2627 ]>, 2628 Arguments<(ins Optional<Type<Or<[ 2629 AnyVectorOfAnyRank.predicate, 2630 AnyInteger.predicate, Index.predicate, AnyFloat.predicate 2631 ]>>>:$source, DefaultValuedAttr<Vector_PrintPunctuation, 2632 "::mlir::vector::PrintPunctuation::NewLine">:$punctuation, 2633 OptionalAttr<Builtin_StringAttr>:$stringLiteral) 2634 > { 2635 let summary = "print operation (for testing and debugging)"; 2636 let description = [{ 2637 Prints the source vector (or scalar) to stdout in a human-readable format 2638 (for testing and debugging). No return value. 2639 2640 Example: 2641 2642 ```mlir 2643 %v = arith.constant dense<0.0> : vector<4xf32> 2644 vector.print %v : vector<4xf32> 2645 ``` 2646 2647 When lowered to LLVM, the vector print is decomposed into elementary 2648 printing method calls that at runtime will yield: 2649 2650 ``` 2651 ( 0.0, 0.0, 0.0, 0.0 ) 2652 ``` 2653 2654 This is printed to stdout via a small runtime support library, which only 2655 needs to provide a few printing methods (single value for all data 2656 types, opening/closing bracket, comma, newline). 2657 2658 By default `vector.print` adds a newline after the vector, but this can be 2659 controlled by the `punctuation` attribute. For example, to print a comma 2660 after instead do: 2661 2662 ```mlir 2663 vector.print %v : vector<4xf32> punctuation <comma> 2664 ``` 2665 2666 Note that it is possible to use the punctuation attribute alone. The 2667 following will print a single newline: 2668 2669 ```mlir 2670 vector.print punctuation <newline> 2671 ``` 2672 2673 Additionally, to aid with debugging and testing `vector.print` can also 2674 print constant strings: 2675 2676 ```mlir 2677 vector.print str "Hello, World!" 2678 ``` 2679 }]; 2680 let extraClassDeclaration = [{ 2681 Type getPrintType() { 2682 return getSource().getType(); 2683 } 2684 }]; 2685 let builders = [ 2686 OpBuilder<(ins "PrintPunctuation":$punctuation), [{ 2687 build($_builder, $_state, {}, punctuation, {}); 2688 }]>, 2689 OpBuilder<(ins "::mlir::Value":$source), [{ 2690 build($_builder, $_state, source, PrintPunctuation::NewLine); 2691 }]>, 2692 OpBuilder<(ins "::mlir::Value":$source, "PrintPunctuation":$punctuation), [{ 2693 build($_builder, $_state, source, punctuation, {}); 2694 }]>, 2695 OpBuilder<(ins "::llvm::StringRef":$string), [{ 2696 build($_builder, $_state, {}, PrintPunctuation::NewLine, $_builder.getStringAttr(string)); 2697 }]>, 2698 ]; 2699 2700 let assemblyFormat = [{ 2701 ($source^ `:` type($source))? 2702 oilist( 2703 `str` $stringLiteral 2704 | `punctuation` $punctuation) 2705 attr-dict 2706 }]; 2707} 2708 2709//===----------------------------------------------------------------------===// 2710// Ops used for supporting progressive lowering and conversion type changes. 2711// The Ops are typically not used directly by higher level dialects, but are 2712// used by intra-dialect rewriting rules to bring vector operations closer 2713// to the hardware ISA. 2714//===----------------------------------------------------------------------===// 2715 2716/// Vector dialect matrix multiplication op that operates on flattened 1-D 2717/// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR. 2718/// This may seem redundant with vector.contract but it serves the purposes of 2719/// more progressive lowering and localized type conversion on the path: 2720/// `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`. 2721def Vector_MatmulOp : Vector_Op<"matrix_multiply", [Pure, 2722 PredOpTrait<"lhs operand and result have same element type", 2723 TCresVTEtIsSameAsOpBase<0, 0>>, 2724 PredOpTrait<"rhs operand and result have same element type", 2725 TCresVTEtIsSameAsOpBase<0, 1>>]>, 2726 Arguments<( 2727 // TODO: tighten vector element types that make sense. 2728 ins FixedVectorOfRankAndType<[1], 2729 [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$lhs, 2730 FixedVectorOfRankAndType<[1], 2731 [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$rhs, 2732 I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>, 2733 Results<( 2734 outs FixedVectorOfRankAndType<[1], 2735 [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$res)> 2736{ 2737 let summary = "Vector matrix multiplication op that operates on flattened 1-D" 2738 " MLIR vectors"; 2739 let description = [{ 2740 This is the counterpart of llvm.matrix.multiply in MLIR. It serves the 2741 purposes of more progressive lowering and localized type conversion. 2742 Higher levels typically lower matrix multiplications into 'vector.contract' 2743 operations. Subsequent rewriting rule progressively lower these operations 2744 into 'vector.matrix_multiply' operations to bring the operations closer 2745 to the hardware ISA. 2746 2747 The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows 2748 and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and 2749 <rhs_columns> and multiplies them. The result matrix is returned embedded in 2750 the result vector. 2751 2752 Note, the corresponding LLVM intrinsic, `@llvm.matrix.multiply.*`, does not 2753 support scalable vectors. Hence, this Op is only available for fixed-width 2754 vectors. Also see: 2755 2756 http://llvm.org/docs/LangRef.html#llvm-matrix-multiply-intrinsic 2757 2758 Example: 2759 2760 ```mlir 2761 %C = vector.matrix_multiply %A, %B 2762 { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } : 2763 (vector<64xf64>, vector<48xf64>) -> vector<12xf64> 2764 ``` 2765 }]; 2766 let builders = [ 2767 OpBuilder<(ins "Value":$lhs, "Value":$rhs, "unsigned":$lhsRows, 2768 "unsigned":$lhsColumns, "unsigned":$rhsColumns), 2769 [{ 2770 $_state.addOperands({lhs, rhs}); 2771 $_state.addAttribute("lhs_rows",$_builder.getI32IntegerAttr(lhsRows)); 2772 $_state.addAttribute("lhs_columns",$_builder.getI32IntegerAttr(lhsColumns)); 2773 $_state.addAttribute("rhs_columns",$_builder.getI32IntegerAttr(rhsColumns)); 2774 $_state.addTypes(VectorType::get(lhsRows * rhsColumns, 2775 ::llvm::cast<VectorType>(lhs.getType()).getElementType())); 2776 }]>, 2777 ]; 2778 let assemblyFormat = "$lhs `,` $rhs attr-dict " 2779 "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)"; 2780} 2781 2782/// Vector dialect matrix transposition op that operates on flattened 1-D 2783/// MLIR vectors. This is the counterpart of llvm.matrix.transpose in MLIR. 2784/// This may seem redundant with vector.transpose but it serves the purposes of 2785/// more progressive lowering and localized type conversion on the path: 2786/// `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`. 2787def Vector_FlatTransposeOp : Vector_Op<"flat_transpose", [Pure, 2788 PredOpTrait<"source operand and result have same element type", 2789 TCresVTEtIsSameAsOpBase<0, 0>>]>, 2790 Arguments<( 2791 // TODO: tighten vector element types that make sense. 2792 ins FixedVectorOfRankAndType<[1], 2793 [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$matrix, 2794 I32Attr:$rows, I32Attr:$columns)>, 2795 Results<( 2796 outs FixedVectorOfRankAndType<[1], 2797 [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$res)> { 2798 let summary = "Vector matrix transposition on flattened 1-D MLIR vectors"; 2799 let description = [{ 2800 This is the counterpart of llvm.matrix.transpose in MLIR. It serves 2801 the purposes of more progressive lowering and localized type conversion. 2802 Higher levels typically lower matrix transpositions into 'vector.transpose' 2803 operations. Subsequent rewriting rule progressively lower these operations 2804 into 'vector.flat_transpose' operations to bring the operations closer 2805 to the hardware ISA. 2806 2807 The `vector.flat_transpose` op treats the 1-D input `matrix` as 2808 a 2-D matrix with <rows> rows and <columns> columns, and returns the 2809 transposed matrix in flattened form in 'res'. 2810 2811 Note, the corresponding LLVM intrinsic, `@llvm.matrix.transpose.*`, does not 2812 support scalable vectors. Hence, this Op is only available for fixed-width 2813 vectors. Also see: 2814 2815 http://llvm.org/docs/LangRef.html#llvm-matrix-transpose-intrinsic 2816 2817 Example: 2818 2819 ```mlir 2820 %1 = vector.flat_transpose %0 {columns = 4 : i32, rows = 4 : i32} 2821 : vector<16xf32> -> vector<16xf32> 2822 ``` 2823 }]; 2824 let assemblyFormat = "$matrix attr-dict `:` type($matrix) `->` type($res)"; 2825} 2826 2827//===----------------------------------------------------------------------===// 2828// SplatOp 2829//===----------------------------------------------------------------------===// 2830 2831def Vector_SplatOp : Vector_Op<"splat", [ 2832 Pure, 2833 DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, 2834 TypesMatchWith<"operand type matches element type of result", 2835 "aggregate", "input", 2836 "::llvm::cast<VectorType>($_self).getElementType()"> 2837 ]> { 2838 let summary = "vector splat or broadcast operation"; 2839 let description = [{ 2840 Broadcast the operand to all elements of the result vector. The operand is 2841 required to be of integer/index/float type. 2842 2843 Example: 2844 2845 ```mlir 2846 %s = arith.constant 10.1 : f32 2847 %t = vector.splat %s : vector<8x16xf32> 2848 ``` 2849 }]; 2850 2851 let arguments = (ins AnyTypeOf<[AnySignlessInteger, Index, AnyFloat], 2852 "integer/index/float type">:$input); 2853 let results = (outs AnyVectorOfAnyRank:$aggregate); 2854 2855 let builders = [ 2856 OpBuilder<(ins "Value":$element, "Type":$aggregateType), 2857 [{ build($_builder, $_state, aggregateType, element); }]>]; 2858 let assemblyFormat = "$input attr-dict `:` type($aggregate)"; 2859 2860 let hasFolder = 1; 2861} 2862 2863//===----------------------------------------------------------------------===// 2864// VectorScaleOp 2865//===----------------------------------------------------------------------===// 2866 2867// TODO: In the future, we might want to have scalable vectors with different 2868// scales for different dimensions. E.g.: vector<[16]x[16]xf32>, in 2869// which case we might need to add an index to 'vscale' to select one 2870// of them. In order to support GPUs, we might also want to differentiate 2871// between a 'global' scale, a scale that's fixed throughout the 2872// execution, and a 'local' scale that is fixed but might vary with each 2873// call to the function. For that, it might be useful to have a 2874// 'vector.scale.global' and a 'vector.scale.local' operation. 2875def VectorScaleOp : Vector_Op<"vscale", 2876 [Pure, DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>] 2877> { 2878 let summary = "Load vector scale size"; 2879 let description = [{ 2880 The `vscale` op returns the scale of the scalable vectors, a positive 2881 integer value that is constant at runtime but unknown at compile-time. 2882 The scale of the vector indicates the multiplicity of the vectors and 2883 vector operations. For example, a `vector<[4]xi32>` is equivalent to 2884 `vscale` consecutive `vector<4xi32>`; and an operation on a 2885 `vector<[4]xi32>` is equivalent to performing that operation `vscale` 2886 times, once on each `<4xi32>` segment of the scalable vector. The `vscale` 2887 op can be used to calculate the step in vector-length agnostic (VLA) loops. 2888 Right now we only support one contiguous set of scalable dimensions, all of 2889 them grouped and scaled with the value returned by 'vscale'. 2890 }]; 2891 let results = (outs Index:$res); 2892 let assemblyFormat = "attr-dict"; 2893 2894 let extraClassDefinition = [{ 2895 void $cppClass::getAsmResultNames( 2896 ::llvm::function_ref<void(mlir::Value, mlir::StringRef)> setNameFn) { 2897 setNameFn(getResult(), "vscale"); 2898 } 2899 }]; 2900} 2901 2902//===----------------------------------------------------------------------===// 2903// VectorScanOp 2904//===----------------------------------------------------------------------===// 2905 2906def Vector_ScanOp : 2907 Vector_Op<"scan", [Pure, 2908 AllTypesMatch<["source", "dest"]>, 2909 AllTypesMatch<["initial_value", "accumulated_value"]> ]>, 2910 Arguments<(ins Vector_CombiningKindAttr:$kind, 2911 AnyVectorOfNonZeroRank:$source, 2912 AnyVectorOfAnyRank:$initial_value, 2913 I64Attr:$reduction_dim, 2914 BoolAttr:$inclusive)>, 2915 Results<(outs AnyVectorOfNonZeroRank:$dest, 2916 AnyVectorOfAnyRank:$accumulated_value)> { 2917 let summary = "Scan operation"; 2918 let description = [{ 2919 Performs an inclusive/exclusive scan on an n-D vector along a single 2920 dimension returning an n-D result vector using the given 2921 operation (`add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for 2922 integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf`/`maximumf` for 2923 floats), and a specified value for the initial value. The operator returns 2924 the result of scan as well as the result of the last reduction in the scan. 2925 2926 Example: 2927 2928 ```mlir 2929 %1:2 = vector.scan <add>, %0, %acc {inclusive = false, reduction_dim = 1 : i64} : 2930 vector<4x8x16x32xf32>, vector<4x16x32xf32> 2931 ``` 2932 }]; 2933 let builders = [ 2934 OpBuilder<(ins "Value":$source, "Value":$initial_value, 2935 "CombiningKind":$kind, 2936 CArg<"int64_t", "0">:$reduction_dim, 2937 CArg<"bool", "true">:$inclusive)> 2938 ]; 2939 let extraClassDeclaration = [{ 2940 VectorType getSourceType() { 2941 return ::llvm::cast<VectorType>(getSource().getType()); 2942 } 2943 VectorType getDestType() { 2944 return ::llvm::cast<VectorType>(getDest().getType()); 2945 } 2946 VectorType getAccumulatorType() { 2947 return ::llvm::cast<VectorType>(getAccumulatedValue().getType()); 2948 } 2949 VectorType getInitialValueType() { 2950 return ::llvm::cast<VectorType>(getInitialValue().getType()); 2951 } 2952 }]; 2953 let assemblyFormat = 2954 "$kind `,` $source `,` $initial_value attr-dict `:` " 2955 "type($source) `,` type($initial_value) "; 2956 let hasVerifier = 1; 2957} 2958 2959//===----------------------------------------------------------------------===// 2960// VectorStepOp 2961//===----------------------------------------------------------------------===// 2962 2963def Vector_StepOp : Vector_Op<"step", [Pure]> { 2964 let summary = "A linear sequence of values from 0 to N"; 2965 let description = [{ 2966 A `step` operation produces an index vector, i.e. a 1-D vector of values of 2967 index type that represents a linear sequence from 0 to N-1, where N is the 2968 number of elements in the `result` vector. 2969 2970 Supports fixed-width and scalable vectors. 2971 2972 Examples: 2973 2974 ```mlir 2975 %0 = vector.step : vector<4xindex> ; [0, 1, 2, 3] 2976 %1 = vector.step : vector<[4]xindex> ; [0, 1, .., <vscale * 4 - 1>] 2977 ``` 2978 }]; 2979 let results = (outs VectorOfRankAndType<[1], [Index]>:$result); 2980 let assemblyFormat = "attr-dict `:` type($result)"; 2981} 2982 2983def Vector_YieldOp : Vector_Op<"yield", [ 2984 Pure, ReturnLike, Terminator]> { 2985 let summary = "Terminates and yields values from vector regions."; 2986 let description = [{ 2987 "vector.yield" yields an SSA value from the Vector dialect op region and 2988 terminates the regions. The semantics of how the values are yielded is 2989 defined by the parent operation. 2990 If "vector.yield" has any operands, the operands must correspond to the 2991 parent operation's results. 2992 If the parent operation defines no value the vector.yield may be omitted 2993 when printing the region. 2994 }]; 2995 2996 let arguments = (ins Variadic<AnyType>:$operands); 2997 2998 let builders = [ 2999 OpBuilder<(ins), [{ /* nothing to do */ }]>, 3000 ]; 3001 3002 let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; 3003} 3004 3005 3006#endif // MLIR_DIALECT_VECTOR_IR_VECTOR_OPS 3007