xref: /llvm-project/mlir/test/Dialect/Vector/canonicalize.mlir (revision 35df525fd00c2037ef144189ee818b7d612241ff)
1// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s
2
3// CHECK-LABEL: create_vector_mask_to_constant_mask
4func.func @create_vector_mask_to_constant_mask() -> (vector<4x3xi1>) {
5  %c2 = arith.constant 2 : index
6  %c3 = arith.constant 3 : index
7  // CHECK: vector.constant_mask [3, 2] : vector<4x3xi1>
8  %0 = vector.create_mask %c3, %c2 : vector<4x3xi1>
9  return %0 : vector<4x3xi1>
10}
11// -----
12
13// CHECK-LABEL: create_scalable_vector_mask_to_constant_mask
14func.func @create_scalable_vector_mask_to_constant_mask() -> (vector<[8]xi1>) {
15  %c-1 = arith.constant -1 : index
16  // CHECK: vector.constant_mask [0] : vector<[8]xi1>
17  %0 = vector.create_mask %c-1 : vector<[8]xi1>
18  return %0 : vector<[8]xi1>
19}
20
21// -----
22
23// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation
24func.func @create_vector_mask_to_constant_mask_truncation() -> (vector<4x3xi1>) {
25  %c2 = arith.constant 2 : index
26  %c5 = arith.constant 5 : index
27  // CHECK: vector.constant_mask [4, 2] : vector<4x3xi1>
28  %0 = vector.create_mask %c5, %c2 : vector<4x3xi1>
29  return %0 : vector<4x3xi1>
30}
31
32// -----
33
34// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_neg
35func.func @create_vector_mask_to_constant_mask_truncation_neg() -> (vector<4x3xi1>) {
36  %cneg2 = arith.constant -2 : index
37  %c5 = arith.constant 5 : index
38  // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
39  %0 = vector.create_mask %c5, %cneg2 : vector<4x3xi1>
40  return %0 : vector<4x3xi1>
41}
42
43// -----
44
45// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_zero
46func.func @create_vector_mask_to_constant_mask_truncation_zero() -> (vector<4x3xi1>) {
47  %c2 = arith.constant 2 : index
48  %c0 = arith.constant 0 : index
49  // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
50  %0 = vector.create_mask %c0, %c2 : vector<4x3xi1>
51  return %0 : vector<4x3xi1>
52}
53
54// -----
55
56// CHECK-LABEL: create_vector_mask_to_constant_mask_scalable_all_true
57func.func @create_vector_mask_to_constant_mask_scalable_all_true() -> (vector<8x[16]xi1>) {
58  %c8 = arith.constant 8 : index
59  %c16 = arith.constant 16 : index
60  %0 = vector.vscale
61  %1 = arith.muli %0, %c16 : index
62  // CHECK: vector.constant_mask [8, 16] : vector<8x[16]xi1>
63  %10 = vector.create_mask %c8, %1 : vector<8x[16]xi1>
64  return %10 : vector<8x[16]xi1>
65}
66
67// -----
68
69// CHECK-LABEL: create_mask_transpose_to_transposed_create_mask
70//  CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index, %[[DIM2:.*]]: index
71func.func @create_mask_transpose_to_transposed_create_mask(
72  %dim0: index, %dim1: index, %dim2: index) -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
73  //     CHECK: vector.create_mask %[[DIM0]], %[[DIM1]], %[[DIM2]] : vector<2x3x4xi1>
74  //     CHECK: vector.create_mask %[[DIM2]], %[[DIM0]], %[[DIM1]] : vector<4x2x3xi1>
75  // CHECK-NOT: vector.transpose
76  %0 = vector.create_mask %dim0, %dim1, %dim2 : vector<2x3x4xi1>
77  %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
78  return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
79}
80
81// -----
82
83// CHECK-LABEL: extract_from_create_mask
84//  CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
85func.func @extract_from_create_mask(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
86  %c2 = arith.constant 2 : index
87  %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
88  // CHECK: vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[4]x[4]xi1>
89  // CHECK-NOT: vector.extract
90  %extract = vector.extract %mask[1] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
91  return %extract : vector<[4]x[4]xi1>
92}
93
94// -----
95
96// CHECK-LABEL: extract_from_create_mask_all_false
97func.func @extract_from_create_mask_all_false(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
98  %c2 = arith.constant 2 : index
99  %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
100  // CHECK: arith.constant dense<false> : vector<[4]x[4]xi1>
101  // CHECK-NOT: vector.extract
102  %extract = vector.extract %mask[2] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
103  return %extract : vector<[4]x[4]xi1>
104}
105
106// -----
107
108// CHECK-LABEL: extract_from_create_mask_leading_scalable
109//  CHECK-SAME: %[[DIM0:.*]]: index
110func.func @extract_from_create_mask_leading_scalable(%dim0: index) -> vector<8xi1> {
111  %c3 = arith.constant 3 : index
112  %mask = vector.create_mask %c3, %dim0 : vector<[4]x8xi1>
113  // CHECK: vector.create_mask %[[DIM0]] : vector<8xi1>
114  // CHECK-NOT: vector.extract
115  %extract = vector.extract %mask[1] : vector<8xi1> from vector<[4]x8xi1>
116  return %extract : vector<8xi1>
117}
118
119// -----
120
121// CHECK-LABEL: extract_from_create_mask_dynamic_position
122//  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
123func.func @extract_from_create_mask_dynamic_position(%dim0: index, %index: index) -> vector<6xi1> {
124  %c4 = arith.constant 4 : index
125  %c3 = arith.constant 3 : index
126  %mask = vector.create_mask %c3, %c4, %dim0 : vector<4x4x6xi1>
127  // CHECK: vector.create_mask %[[DIM0]] : vector<6xi1>
128  // CHECK-NOT: vector.extract
129  %extract = vector.extract %mask[2, %index] : vector<6xi1> from vector<4x4x6xi1>
130  return %extract : vector<6xi1>
131}
132
133// -----
134
135// CHECK-LABEL: @extract_scalar_poison_idx
136func.func @extract_scalar_poison_idx(%a: vector<4x5xf32>) -> f32 {
137  //  CHECK-NOT: vector.extract
138  // CHECK-NEXT: ub.poison : f32
139  %0 = vector.extract %a[-1, 0] : f32 from vector<4x5xf32>
140  return %0 : f32
141}
142
143// -----
144
145// CHECK-LABEL: @extract_vector_poison_idx
146func.func @extract_vector_poison_idx(%a: vector<4x5xf32>) -> vector<5xf32> {
147  //  CHECK-NOT: vector.extract
148  // CHECK-NEXT: ub.poison : vector<5xf32>
149  %0 = vector.extract %a[-1] : vector<5xf32> from vector<4x5xf32>
150  return %0 : vector<5xf32>
151}
152
153// -----
154
155// CHECK-LABEL: @extract_multiple_poison_idx
156func.func @extract_multiple_poison_idx(%a: vector<4x5x8xf32>)
157    -> vector<8xf32> {
158  //  CHECK-NOT: vector.extract
159  // CHECK-NEXT: ub.poison : vector<8xf32>
160  %0 = vector.extract %a[-1, -1] : vector<8xf32> from vector<4x5x8xf32>
161  return %0 : vector<8xf32>
162}
163
164// -----
165
166// CHECK-LABEL: extract_from_create_mask_dynamic_position_all_false
167//  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
168func.func @extract_from_create_mask_dynamic_position_all_false(%dim0: index, %index: index) -> vector<6xi1> {
169  %c0 = arith.constant 0 : index
170  %c1 = arith.constant 1 : index
171  %mask = vector.create_mask %c1, %c0, %dim0 : vector<1x4x6xi1>
172  // CHECK: arith.constant dense<false> : vector<6xi1>
173  // CHECK-NOT: vector.extract
174  %extract = vector.extract %mask[0, %index] : vector<6xi1> from vector<1x4x6xi1>
175  return %extract : vector<6xi1>
176}
177
178// -----
179
180// CHECK-LABEL: extract_from_create_mask_dynamic_position_unknown
181//  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
182func.func @extract_from_create_mask_dynamic_position_unknown(%dim0: index, %index: index) -> vector<6xi1> {
183  %c2 = arith.constant 2 : index
184  %mask = vector.create_mask %c2, %dim0 : vector<4x6xi1>
185  // CHECK: %[[C2:.*]] = arith.constant 2 : index
186  // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[DIM0]] : vector<4x6xi1>
187  // CHECK-NEXT: vector.extract %[[MASK]][%[[INDEX]]] : vector<6xi1> from vector<4x6xi1>
188  %extract = vector.extract %mask[%index] : vector<6xi1> from vector<4x6xi1>
189  return %extract : vector<6xi1>
190}
191
192// -----
193
194// CHECK-LABEL: extract_from_create_mask_mixed_position_unknown
195//  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
196func.func @extract_from_create_mask_mixed_position_unknown(%dim0: index, %index0: index) -> vector<4xi1> {
197  %c2 = arith.constant 2 : index
198  %mask = vector.create_mask %c2, %c2, %dim0 : vector<2x4x4xi1>
199  // CHECK: %[[C2:.*]] = arith.constant 2 : index
200  // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[C2]], %[[DIM0]] : vector<2x4x4xi1>
201  // CHECK-NEXT: vector.extract %[[MASK]][1, %[[INDEX]]] : vector<4xi1> from vector<2x4x4xi1>
202  %extract = vector.extract %mask[1, %index0] : vector<4xi1> from vector<2x4x4xi1>
203  return %extract : vector<4xi1>
204}
205
206// -----
207
208// CHECK-LABEL: extract_from_non_constant_create_mask
209//  CHECK-SAME: %[[DIM0:.*]]: index
210func.func @extract_from_non_constant_create_mask(%dim0: index) -> vector<[2]xi1> {
211  %mask = vector.create_mask %dim0, %dim0 : vector<[2]x[2]xi1>
212  // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM0]] : vector<[2]x[2]xi1>
213  // CHECK-NEXT: vector.extract %[[MASK]][0] : vector<[2]xi1> from vector<[2]x[2]xi1>
214  %extract = vector.extract %mask[0] : vector<[2]xi1> from vector<[2]x[2]xi1>
215  return %extract : vector<[2]xi1>
216}
217
218// -----
219
220// CHECK-LABEL: constant_mask_transpose_to_transposed_constant_mask
221func.func @constant_mask_transpose_to_transposed_constant_mask() -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
222  //     CHECK: vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
223  //     CHECK: vector.constant_mask [3, 1, 2] : vector<4x2x3xi1>
224  // CHECK-NOT: vector.transpose
225  %0 = vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
226  %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
227  return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
228}
229
230// -----
231
232func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
233  %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
234  %1 = vector.extract_strided_slice %0
235    {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]}
236      : vector<4x3xi1> to vector<2x2xi1>
237  // CHECK: vector.constant_mask [2, 2] : vector<2x2xi1>
238  return %1 : vector<2x2xi1>
239}
240
241// -----
242
243func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
244  %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
245  %1 = vector.extract_strided_slice %0
246    {offsets = [1, 0], sizes = [2, 2], strides = [1, 1]}
247      : vector<4x3xi1> to vector<2x2xi1>
248  // CHECK: vector.constant_mask [1, 2] : vector<2x2xi1>
249  return %1 : vector<2x2xi1>
250}
251
252// -----
253
254func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
255  %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
256  %1 = vector.extract_strided_slice %0
257    {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]}
258      : vector<4x3xi1> to vector<2x2xi1>
259  // CHECK: vector.constant_mask [2, 1] : vector<2x2xi1>
260  return %1 : vector<2x2xi1>
261}
262
263// -----
264
265func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
266  %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
267  %1 = vector.extract_strided_slice %0
268    {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]}
269      : vector<4x3xi1> to vector<2x2xi1>
270  // CHECK: vector.constant_mask [0, 0] : vector<2x2xi1>
271  return %1 : vector<2x2xi1>
272}
273
274// -----
275
276func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
277  %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
278  %1 = vector.extract_strided_slice %0
279    {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]}
280      : vector<4x3xi1> to vector<2x1xi1>
281  // CHECK: vector.constant_mask [0, 0] : vector<2x1xi1>
282  return %1 : vector<2x1xi1>
283}
284
285// -----
286
287func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
288  %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
289  %1 = vector.extract_strided_slice %0
290    {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]}
291      : vector<4x3xi1> to vector<2x1xi1>
292  // CHECK: vector.constant_mask [2, 1] : vector<2x1xi1>
293  return %1 : vector<2x1xi1>
294}
295
296// -----
297
298func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
299  %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
300  %1 = vector.extract_strided_slice %0
301    {offsets = [1, 1], sizes = [2, 1], strides = [1, 1]}
302      : vector<4x3xi1> to vector<2x1xi1>
303  // CHECK: vector.constant_mask [1, 1] : vector<2x1xi1>
304  return %1 : vector<2x1xi1>
305}
306
307// -----
308
309// CHECK-LABEL: extract_strided_fold
310//  CHECK-SAME: (%[[ARG:.*]]: vector<4x3xi1>)
311//  CHECK-NEXT:   return %[[ARG]] : vector<4x3xi1>
312func.func @extract_strided_fold(%arg : vector<4x3xi1>) -> (vector<4x3xi1>) {
313  %0 = vector.extract_strided_slice %arg
314    {offsets = [0, 0], sizes = [4, 3], strides = [1, 1]}
315      : vector<4x3xi1> to vector<4x3xi1>
316  return %0 : vector<4x3xi1>
317}
318
319// -----
320
321// CHECK-LABEL: extract_strided_fold_insert
322//  CHECK-SAME: (%[[ARG:.*]]: vector<4x4xf32>
323//  CHECK-NEXT:   return %[[ARG]] : vector<4x4xf32>
324func.func @extract_strided_fold_insert(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
325  -> (vector<4x4xf32>) {
326  %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
327    : vector<4x4xf32> into vector<8x16xf32>
328  %1 = vector.extract_strided_slice %0
329    {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
330      : vector<8x16xf32> to vector<4x4xf32>
331  return %1 : vector<4x4xf32>
332}
333
334// -----
335
336// Case where the vector inserted is a subset of the vector extracted.
337// CHECK-LABEL: extract_strided_fold_insert
338//  CHECK-SAME: (%[[ARG0:.*]]: vector<6x4xf32>
339//  CHECK-NEXT:   %[[EXT:.*]] = vector.extract_strided_slice %[[ARG0]]
340//  CHECK-SAME:     {offsets = [0, 0], sizes = [4, 4], strides = [1, 1]}
341//  CHECK-SAME:       : vector<6x4xf32> to vector<4x4xf32>
342//  CHECK-NEXT:   return %[[EXT]] : vector<4x4xf32>
343func.func @extract_strided_fold_insert(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
344  -> (vector<4x4xf32>) {
345  %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
346    : vector<6x4xf32> into vector<8x16xf32>
347  %1 = vector.extract_strided_slice %0
348    {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
349      : vector<8x16xf32> to vector<4x4xf32>
350  return %1 : vector<4x4xf32>
351}
352
353// -----
354
355// Negative test where the extract is not a subset of the element inserted.
356// CHECK-LABEL: extract_strided_fold_negative
357//  CHECK-SAME: (%[[ARG0:.*]]: vector<4x4xf32>, %[[ARG1:.*]]: vector<8x16xf32>
358//       CHECK:   %[[INS:.*]] = vector.insert_strided_slice %[[ARG0]], %[[ARG1]]
359//  CHECK-SAME:     {offsets = [2, 2], strides = [1, 1]}
360//  CHECK-SAME:       : vector<4x4xf32> into vector<8x16xf32>
361//       CHECK:   %[[EXT:.*]] = vector.extract_strided_slice %[[INS]]
362//  CHECK-SAME:     {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
363//  CHECK-SAME:       : vector<8x16xf32> to vector<6x4xf32>
364//  CHECK-NEXT:   return %[[EXT]] : vector<6x4xf32>
365func.func @extract_strided_fold_negative(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
366  -> (vector<6x4xf32>) {
367  %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
368    : vector<4x4xf32> into vector<8x16xf32>
369  %1 = vector.extract_strided_slice %0
370    {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
371      : vector<8x16xf32> to vector<6x4xf32>
372  return %1 : vector<6x4xf32>
373}
374
375// -----
376
377// Case where we need to go through 2 level of insert element.
378// CHECK-LABEL: extract_strided_fold_insert
379//  CHECK-SAME: (%[[ARG0:.*]]: vector<2x8xf32>, %[[ARG1:.*]]: vector<1x4xf32>,
380//  CHECK-NEXT:   %[[EXT:.*]] = vector.extract_strided_slice %[[ARG1]]
381//  CHECK-SAME:     {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]}
382//  CHECK-SAME:       : vector<1x4xf32> to vector<1x1xf32>
383//  CHECK-NEXT:   return %[[EXT]] : vector<1x1xf32>
384func.func @extract_strided_fold_insert(%a: vector<2x8xf32>, %b: vector<1x4xf32>,
385                                  %c : vector<1x4xf32>) -> (vector<1x1xf32>) {
386  %0 = vector.insert_strided_slice %b, %a {offsets = [0, 1], strides = [1, 1]}
387    : vector<1x4xf32> into vector<2x8xf32>
388  %1 = vector.insert_strided_slice %c, %0 {offsets = [1, 0], strides = [1, 1]}
389    : vector<1x4xf32> into vector<2x8xf32>
390  %2 = vector.extract_strided_slice %1
391      {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]}
392        : vector<2x8xf32> to vector<1x1xf32>
393  return %2 : vector<1x1xf32>
394}
395
396// -----
397
398// CHECK-LABEL: transpose_1D_identity
399// CHECK-SAME: ([[ARG:%.*]]: vector<4xf32>)
400func.func @transpose_1D_identity(%arg : vector<4xf32>) -> vector<4xf32> {
401  // CHECK-NOT: transpose
402  %0 = vector.transpose %arg, [0] : vector<4xf32> to vector<4xf32>
403  // CHECK-NEXT: return [[ARG]]
404  return %0 : vector<4xf32>
405}
406
407// -----
408
409// CHECK-LABEL: transpose_2D_identity
410// CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
411func.func @transpose_2D_identity(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
412  // CHECK-NOT: transpose
413  %0 = vector.transpose %arg, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
414  // CHECK-NEXT: return [[ARG]]
415  return %0 : vector<4x3xf32>
416}
417
418// -----
419
420// CHECK-LABEL: transpose_3D_identity
421// CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
422func.func @transpose_3D_identity(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
423  // CHECK-NOT: transpose
424  %0 = vector.transpose %arg, [0, 1, 2] : vector<4x3x2xf32> to vector<4x3x2xf32>
425  // CHECK-NEXT: return [[ARG]]
426  return %0 : vector<4x3x2xf32>
427}
428
429// -----
430
431// CHECK-LABEL: transpose_2D_sequence
432// CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
433func.func @transpose_2D_sequence(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
434  // CHECK-NOT: transpose
435  %0 = vector.transpose %arg, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
436  %1 = vector.transpose %0, [0, 1] : vector<3x4xf32> to vector<3x4xf32>
437  %2 = vector.transpose %1, [1, 0] : vector<3x4xf32> to vector<4x3xf32>
438  %3 = vector.transpose %2, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
439  // CHECK: [[ADD:%.*]] = arith.addf [[ARG]], [[ARG]]
440  %4 = arith.addf %2, %3 : vector<4x3xf32>
441  // CHECK-NEXT: return [[ADD]]
442  return %4 : vector<4x3xf32>
443}
444
445// -----
446
447// CHECK-LABEL: transpose_3D_sequence
448// CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
449func.func @transpose_3D_sequence(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
450  // CHECK: [[T0:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
451  %0 = vector.transpose %arg, [1, 2, 0] : vector<4x3x2xf32> to vector<3x2x4xf32>
452  %1 = vector.transpose %0, [1, 0, 2] : vector<3x2x4xf32> to vector<2x3x4xf32>
453  // CHECK: [[T1:%.*]] = vector.transpose %arg0, [2, 1, 0]
454  %2 = vector.transpose %1, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
455  %3 = vector.transpose %2, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
456  // CHECK: [[MUL:%.*]] = arith.mulf [[T0]], [[T1]]
457  %4 = arith.mulf %1, %3 : vector<2x3x4xf32>
458  // CHECK: [[T5:%.*]] = vector.transpose [[MUL]], [2, 1, 0]
459  %5 = vector.transpose %4, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
460  // CHECK-NOT: transpose
461  %6 = vector.transpose %3, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
462  // CHECK: [[ADD:%.*]] = arith.addf [[T5]], [[ARG]]
463  %7 = arith.addf %5, %6 : vector<4x3x2xf32>
464  // CHECK-NEXT: return [[ADD]]
465  return %7 : vector<4x3x2xf32>
466}
467
468// -----
469
470// CHECK-LABEL: cast_transfers
471func.func @cast_transfers(%A: memref<4x8xf32>) -> (vector<4x8xf32>) {
472  %c0 = arith.constant 0 : index
473  %f0 = arith.constant 0.0 : f32
474  %0 = memref.cast %A : memref<4x8xf32> to memref<?x?xf32>
475
476  // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<4x8xf32>, vector<4x8xf32>
477  %1 = vector.transfer_read %0[%c0, %c0], %f0 : memref<?x?xf32>, vector<4x8xf32>
478
479  // CHECK: vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, memref<4x8xf32>
480  vector.transfer_write %1, %0[%c0, %c0] : vector<4x8xf32>, memref<?x?xf32>
481  return %1 : vector<4x8xf32>
482}
483
484// -----
485
486// CHECK-LABEL: cast_transfers
487func.func @cast_transfers(%A: tensor<4x8xf32>) -> (vector<4x8xf32>) {
488  %c0 = arith.constant 0 : index
489  %f0 = arith.constant 0.0 : f32
490  %0 = tensor.cast %A : tensor<4x8xf32> to tensor<?x?xf32>
491
492  // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<4x8xf32>, vector<4x8xf32>
493  %1 = vector.transfer_read %0[%c0, %c0], %f0 : tensor<?x?xf32>, vector<4x8xf32>
494
495  return %1 : vector<4x8xf32>
496}
497
498// -----
499
500// CHECK-LABEL: func @insert_extract_transpose_2d(
501//  CHECK-SAME: %[[V:[a-zA-Z0-9]*]]: vector<2x3xf32>,
502//  CHECK-SAME: %[[F0:[a-zA-Z0-9]*]]: f32,
503//  CHECK-SAME: %[[F1:[a-zA-Z0-9]*]]: f32,
504//  CHECK-SAME: %[[F2:[a-zA-Z0-9]*]]: f32,
505//  CHECK-SAME: %[[F3:[a-zA-Z0-9]*]]: f32
506func.func @insert_extract_transpose_2d(
507    %v: vector<2x3xf32>, %f0: f32, %f1: f32, %f2: f32, %f3: f32)
508-> (f32, f32, f32)
509{
510  %0 = vector.insert %f0, %v[0, 0] : f32 into vector<2x3xf32>
511  %1 = vector.insert %f1, %0[0, 1] : f32 into vector<2x3xf32>
512  %2 = vector.insert %f2, %1[1, 0] : f32 into vector<2x3xf32>
513  %3 = vector.insert %f3, %2[1, 1] : f32 into vector<2x3xf32>
514  %4 = vector.transpose %3, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
515  %5 = vector.insert %f3, %4[1, 0] : f32 into vector<3x2xf32>
516  %6 = vector.transpose %5, [1, 0] : vector<3x2xf32> to vector<2x3xf32>
517
518  // Expected %f2 from %2 = vector.insert %f2, %1[1, 0].
519  %r1 = vector.extract %3[1, 0] : f32 from vector<2x3xf32>
520
521  // Expected %f1 from %1 = vector.insert %f1, %0[0, 1] followed by
522  // transpose [1, 0].
523  %r2 = vector.extract %4[1, 0] : f32 from vector<3x2xf32>
524
525  // Expected %f2 from %2 = vector.insert %f2, %1[1, 0] followed by double
526  // transpose [1, 0].
527  %r3 = vector.extract %6[1, 0] : f32 from vector<2x3xf32>
528
529  // CHECK-NEXT: return %[[F2]], %[[F1]], %[[F2]] : f32, f32, f32
530  return %r1, %r2, %r3 : f32, f32, f32
531}
532
533// -----
534
535// CHECK-LABEL: insert_extract_chain
536//  CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
537//  CHECK-SAME: %[[V34:[a-zA-Z0-9]*]]: vector<3x4xf32>
538//  CHECK-SAME: %[[V4:[a-zA-Z0-9]*]]: vector<4xf32>
539func.func @insert_extract_chain(%v234: vector<2x3x4xf32>, %v34: vector<3x4xf32>, %v4: vector<4xf32>)
540    -> (vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>) {
541  // CHECK-NEXT: %[[A34:.*]] = vector.insert
542  %A34 = vector.insert %v34, %v234[0]: vector<3x4xf32> into vector<2x3x4xf32>
543  // CHECK-NEXT: %[[B34:.*]] = vector.insert
544  %B34 = vector.insert %v34, %A34[1]: vector<3x4xf32> into vector<2x3x4xf32>
545  // CHECK-NEXT: %[[A4:.*]] = vector.insert
546  %A4 = vector.insert %v4, %B34[1, 0]: vector<4xf32> into vector<2x3x4xf32>
547  // CHECK-NEXT: %[[B4:.*]] = vector.insert
548  %B4 = vector.insert %v4, %A4[1, 1]: vector<4xf32> into vector<2x3x4xf32>
549
550  // Case 2.a. [1, 1] == insertpos ([1, 1])
551  // Match %A4 insertionpos and fold to its source(i.e. %V4).
552   %r0 = vector.extract %B4[1, 1]: vector<4xf32> from vector<2x3x4xf32>
553
554  // Case 3.a. insertpos ([1]) is a prefix of [1, 0].
555  // Traverse %B34 to its source(i.e. %V34@[*0*]).
556  // CHECK-NEXT: %[[R1:.*]] = vector.extract %[[V34]][0]
557   %r1 = vector.extract %B34[1, 0]: vector<4xf32> from vector<2x3x4xf32>
558
559  // Case 4. [1] is a prefix of insertpos ([1, 1]).
560  // Cannot traverse %B4.
561  // CHECK-NEXT: %[[R2:.*]] = vector.extract %[[B4]][1]
562   %r2 = vector.extract %B4[1]: vector<3x4xf32> from vector<2x3x4xf32>
563
564  // Case 5. [0] is disjoint from insertpos ([1, 1]).
565  // Traverse %B4 to its dest(i.e. %A4@[0]).
566  // Traverse %A4 to its dest(i.e. %B34@[0]).
567  // Traverse %B34 to its dest(i.e. %A34@[0]).
568  // Match %A34 insertionpos and fold to its source(i.e. %V34).
569   %r3 = vector.extract %B4[0]: vector<3x4xf32> from vector<2x3x4xf32>
570
571  // CHECK: return %[[V4]], %[[R1]], %[[R2]], %[[V34]]
572  return %r0, %r1, %r2, %r3:
573    vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>
574}
575
576// -----
577
578// CHECK-LABEL: func @insert_extract_transpose_3d(
579//  CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
580func.func @insert_extract_transpose_3d(
581  %v234: vector<2x3x4xf32>, %v43: vector<4x3xf32>, %f0: f32)
582    -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>) {
583
584  %a432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
585  %b432 = vector.insert %f0, %a432[0, 0, 1] : f32 into vector<4x3x2xf32>
586  %c234 = vector.transpose %b432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
587  // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
588  // Case 5. %b432 = insert [0,0,1] (inter([.,2,1], [.,0,1]) == 0) prop to %v432
589  // Case 1. %a432 = transpose [2,1,0] posWithSentinels [-1,2,1] -> [1,2,-1]
590  // can extract directly from %v234, the rest folds.
591  // CHECK: %[[R0:.*]] = vector.extract %[[V234]][1, 2]
592  %r0 = vector.extract %c234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
593
594  // CHECK-NEXT: vector.transpose
595  // CHECK-NEXT: vector.insert
596  // CHECK-NEXT: %[[F234:.*]] = vector.transpose
597  %d432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
598  %e432 = vector.insert %f0, %d432[0, 2, 1] : f32 into vector<4x3x2xf32>
599  %f234 = vector.transpose %e432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
600  // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
601  // Case 4. %b432 = insert [0,0,1] (inter([.,2,1], [.,2,1]) != 0)
602  // Bail, cannot do better than the current.
603  // CHECK: %[[R1:.*]] = vector.extract %[[F234]]
604  %r1 = vector.extract %f234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
605
606  // CHECK-NEXT: vector.transpose
607  // CHECK-NEXT: vector.insert
608  // CHECK-NEXT: %[[H234:.*]] = vector.transpose
609  %g243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
610  %h243 = vector.insert %v43, %g243[0] : vector<4x3xf32> into vector<2x4x3xf32>
611  %i234 = vector.transpose %h243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
612  // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
613  // Case 3.b. %b432 = insert [0] is prefix of [0,.,.] but internal transpose.
614  // Bail, cannot do better than the current.
615  // CHECK: %[[R2:.*]] = vector.extract %[[H234]][0, 1]
616  %r2 = vector.extract %i234[0, 1] : vector<4xf32> from vector<2x3x4xf32>
617
618  // CHECK-NEXT: vector.transpose
619  // CHECK-NEXT: vector.insert
620  // CHECK-NEXT: %[[K234:.*]] = vector.transpose
621  %j243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
622  %k243 = vector.insert %v43, %j243[0] : vector<4x3xf32> into vector<2x4x3xf32>
623  %l234 = vector.transpose %k243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
624  // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
625  // Case 2.b. %b432 = insert [0] == [0,.,.] but internal transpose.
626  // Bail, cannot do better than the current.
627  // CHECK: %[[R3:.*]] = vector.extract %[[K234]][0]
628  %r3 = vector.extract %l234[0] : vector<3x4xf32> from vector<2x3x4xf32>
629
630  // CHECK-NEXT: return %[[R0]], %[[R1]], %[[R2]], %[[R3]]
631  return %r0, %r1, %r2, %r3: vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>
632}
633
634// -----
635
636// CHECK-LABEL: fold_extracts
637//  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
638func.func @fold_extracts(%a : vector<3x4x5x6xf32>) -> (f32, vector<4x5x6xf32>) {
639  %b = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
640  %c = vector.extract %b[1, 2] : vector<6xf32> from vector<4x5x6xf32>
641  //  CHECK-NEXT: vector.extract %[[A]][0, 1, 2, 3] : f32 from vector<3x4x5x6xf32>
642  %d = vector.extract %c[3] : f32 from vector<6xf32>
643
644  //  CHECK-NEXT: vector.extract %[[A]][0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
645  %e = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
646
647  //  CHECK-NEXT: return
648  return %d, %e : f32, vector<4x5x6xf32>
649}
650
651// -----
652
653// CHECK-LABEL: fold_extract_transpose
654//  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
655//  CHECK-SAME:   %[[B:[a-zA-Z0-9]*]]: vector<3x6x5x6xf32>
656func.func @fold_extract_transpose(
657    %a : vector<3x4x5x6xf32>, %b : vector<3x6x5x6xf32>) -> (
658      vector<6xf32>, vector<6xf32>, vector<6xf32>) {
659  // [3] is a proper most minor identity map in transpose.
660  // Permutation is a self inverse and we have.
661  // [0, 2, 1] ^ -1 o [0, 1, 2] = [0, 2, 1] o [0, 1, 2]
662  //                            = [0, 2, 1]
663  //  CHECK-NEXT: vector.extract %[[A]][0, 2, 1] : vector<6xf32> from vector<3x4x5x6xf32>
664  %0 = vector.transpose %a, [0, 2, 1, 3] : vector<3x4x5x6xf32> to vector<3x5x4x6xf32>
665  %1 = vector.extract %0[0, 1, 2] : vector<6xf32> from vector<3x5x4x6xf32>
666
667  // [3] is a proper most minor identity map in transpose.
668  // Permutation is a not self inverse and we have.
669  // [1, 2, 0] ^ -1 o [0, 1, 2] = [2, 0, 1] o [0, 1, 2]
670  //                            = [2, 0, 1]
671  //  CHECK-NEXT: vector.extract %[[A]][2, 0, 1] : vector<6xf32> from vector<3x4x5x6xf32>
672  %2 = vector.transpose %a, [1, 2, 0, 3] : vector<3x4x5x6xf32> to vector<4x5x3x6xf32>
673  %3 = vector.extract %2[0, 1, 2] : vector<6xf32> from vector<4x5x3x6xf32>
674
675  // Not a minor identity map so intra-vector level has been permuted
676  //  CHECK-NEXT: vector.transpose %[[B]], [0, 2, 3, 1]
677  //  CHECK-NEXT: vector.extract %{{.*}}[0, 1, 2]
678  %4 = vector.transpose %b, [0, 2, 3, 1] : vector<3x6x5x6xf32> to vector<3x5x6x6xf32>
679  %5 = vector.extract %4[0, 1, 2] : vector<6xf32> from vector<3x5x6x6xf32>
680
681  return %1, %3, %5 : vector<6xf32>, vector<6xf32>, vector<6xf32>
682}
683
684// -----
685
686// CHECK-LABEL: fold_extract_broadcast
687//  CHECK-SAME:   %[[A:.*]]: f32
688//       CHECK:   return %[[A]] : f32
689func.func @fold_extract_broadcast(%a : f32) -> f32 {
690  %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
691  %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
692  return %r : f32
693}
694
695// -----
696
697// CHECK-LABEL: fold_extract_broadcast_0dvec
698//  CHECK-SAME:   %[[A:.*]]: vector<f32>
699//       CHECK:   %[[B:.+]] = vector.extractelement %[[A]][] : vector<f32>
700//       CHECK:   return %[[B]] : f32
701func.func @fold_extract_broadcast_0dvec(%a : vector<f32>) -> f32 {
702  %b = vector.broadcast %a : vector<f32> to vector<1x2x4xf32>
703  %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
704  return %r : f32
705}
706
707// -----
708
709// CHECK-LABEL: fold_extract_broadcast_negative
710//       CHECK:   vector.broadcast %{{.*}} : vector<1x1xf32> to vector<1x1x4xf32>
711//       CHECK:   vector.extract %{{.*}}[0, 0] : vector<4xf32> from vector<1x1x4xf32>
712func.func @fold_extract_broadcast_negative(%a : vector<1x1xf32>) -> vector<4xf32> {
713  %b = vector.broadcast %a : vector<1x1xf32> to vector<1x1x4xf32>
714  %r = vector.extract %b[0, 0] : vector<4xf32> from vector<1x1x4xf32>
715  return %r : vector<4xf32>
716}
717
718// -----
719
720// CHECK-LABEL: fold_extract_splat
721//  CHECK-SAME:   %[[A:.*]]: f32
722//       CHECK:   return %[[A]] : f32
723func.func @fold_extract_splat(%a : f32) -> f32 {
724  %b = vector.splat %a : vector<1x2x4xf32>
725  %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
726  return %r : f32
727}
728
729// -----
730
731// CHECK-LABEL: fold_extract_broadcast_vector
732//  CHECK-SAME:   %[[A:.*]]: vector<4xf32>
733//       CHECK:   return %[[A]] : vector<4xf32>
734func.func @fold_extract_broadcast_vector(%a : vector<4xf32>) -> vector<4xf32> {
735  %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
736  %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
737  return %r : vector<4xf32>
738}
739
740// -----
741
742// CHECK-LABEL: fold_extract_broadcast
743//  CHECK-SAME:   %[[A:.*]]: vector<4xf32>
744//       CHECK:   %[[R:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
745//       CHECK:   return %[[R]] : f32
746func.func @fold_extract_broadcast(%a : vector<4xf32>) -> f32 {
747  %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
748  %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
749  return %r : f32
750}
751
752// -----
753
754// CHECK-LABEL: fold_extract_broadcast
755//       CHECK:   %[[B:.*]] = vector.broadcast %{{.*}} : f32 to vector<4xf32>
756//       CHECK:   return %[[B]] : vector<4xf32>
757func.func @fold_extract_broadcast(%a : f32) -> vector<4xf32> {
758  %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
759  %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
760  return %r : vector<4xf32>
761}
762
763// -----
764
765// CHECK-LABEL: fold_extract_broadcast
766//  CHECK-SAME:   %[[A:.*]]: vector<1xf32>
767//       CHECK:   %[[R:.*]] = vector.broadcast %[[A]] : vector<1xf32> to vector<8xf32>
768//       CHECK:   return %[[R]] : vector<8xf32>
769func.func @fold_extract_broadcast(%a : vector<1xf32>) -> vector<8xf32> {
770  %b = vector.broadcast %a : vector<1xf32> to vector<1x8xf32>
771  %r = vector.extract %b[0] : vector<8xf32> from vector<1x8xf32>
772  return %r : vector<8xf32>
773}
774// -----
775
776// CHECK-LABEL: @fold_extract_shuffle
777//  CHECK-SAME:   %[[A:.*]]: vector<8xf32>, %[[B:.*]]: vector<8xf32>
778//   CHECK-NOT:   vector.shuffle
779//       CHECK:   vector.extract %[[A]][0] : f32 from vector<8xf32>
780//       CHECK:   vector.extract %[[B]][0] : f32 from vector<8xf32>
781//       CHECK:   vector.extract %[[A]][7] : f32 from vector<8xf32>
782//       CHECK:   vector.extract %[[B]][7] : f32 from vector<8xf32>
783func.func @fold_extract_shuffle(%a : vector<8xf32>, %b : vector<8xf32>)
784                                -> (f32, f32, f32, f32) {
785  %shuffle = vector.shuffle %a, %b [0, 8, 7, 15] : vector<8xf32>, vector<8xf32>
786  %e0 = vector.extract %shuffle[0] : f32 from vector<4xf32>
787  %e1 = vector.extract %shuffle[1] : f32 from vector<4xf32>
788  %e2 = vector.extract %shuffle[2] : f32 from vector<4xf32>
789  %e3 = vector.extract %shuffle[3] : f32 from vector<4xf32>
790  return %e0, %e1, %e2, %e3 : f32, f32, f32, f32
791}
792
793// -----
794
795// CHECK-LABEL: func @fold_extract_shapecast
796//  CHECK-SAME: (%[[A0:.*]]: vector<5x1x3x2xf32>, %[[A1:.*]]: vector<8x4x2xf32>
797//       CHECK:   %[[R0:.*]] = vector.extract %[[A0]][1, 0, 1, 1] : f32 from vector<5x1x3x2xf32>
798//       CHECK:   %[[R1:.*]] = vector.extract %[[A0]][1, 0, 2] : vector<2xf32> from vector<5x1x3x2xf32>
799//       CHECK:   %[[R2:.*]] = vector.extract %[[A1]][7] : vector<4x2xf32> from vector<8x4x2xf32>
800//       CHECK:   return %[[R0]], %[[R1]], %[[R2]], %[[A1]] : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
801func.func @fold_extract_shapecast(%arg0 : vector<5x1x3x2xf32>,
802                             %arg1 : vector<8x4x2xf32>)
803  -> (f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>) {
804  %0 = vector.shape_cast %arg0 : vector<5x1x3x2xf32> to vector<15x2xf32>
805  %1 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<4x2x4x2xf32>
806  %2 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<1x8x4x2xf32>
807  %r1 = vector.extract %0[4, 1] : f32 from vector<15x2xf32>
808  %r2 = vector.extract %0[5] : vector<2xf32> from vector<15x2xf32>
809  %r3 = vector.extract %1[3, 1] : vector<4x2xf32> from vector<4x2x4x2xf32>
810  %r4 = vector.extract %2[0] : vector<8x4x2xf32> from vector<1x8x4x2xf32>
811  return %r1, %r2, %r3, %r4 : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
812}
813
814// -----
815
816// CHECK-LABEL: fold_extract_shapecast_0d_result
817//  CHECK-SAME: %[[IN:.*]]: vector<1x1x1xf32>
818//       CHECK:   %[[R:.*]] = vector.extract %[[IN]][0, 0, 0] : f32 from vector<1x1x1xf32>
819//       CHECK:   return %[[R]] : f32
820func.func @fold_extract_shapecast_0d_result(%arg0 : vector<1x1x1xf32>) -> f32 {
821  %0 = vector.shape_cast %arg0 : vector<1x1x1xf32> to vector<f32>
822  %r = vector.extract %0[] : f32 from vector<f32>
823  return %r : f32
824}
825
826// -----
827
828// CHECK-LABEL: fold_extract_shapecast_0d_source
829//  CHECK-SAME: %[[IN:.*]]: vector<f32>
830//       CHECK:   %[[R:.*]] = vector.extract %[[IN]][] : f32 from vector<f32>
831//       CHECK:   return %[[R]] : f32
832func.func @fold_extract_shapecast_0d_source(%arg0 : vector<f32>) -> f32 {
833  %0 = vector.shape_cast %arg0 : vector<f32> to vector<1xf32>
834  %r = vector.extract %0[0] : f32 from vector<1xf32>
835  return %r : f32
836}
837
838// -----
839
840// CHECK-LABEL: fold_extract_shapecast_negative
841//       CHECK:   %[[V:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<2x4x2xf32>
842//       CHECK:   %[[R:.*]] = vector.extract %[[V]][1] : vector<4x2xf32> from vector<2x4x2xf32>
843//       CHECK:   return %[[R]] : vector<4x2xf32>
844func.func @fold_extract_shapecast_negative(%arg0 : vector<16xf32>) -> vector<4x2xf32> {
845  %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<2x4x2xf32>
846  %r = vector.extract %0[1] : vector<4x2xf32> from vector<2x4x2xf32>
847  return %r : vector<4x2xf32>
848}
849
850// -----
851
852// CHECK-LABEL: fold_extract_shapecast_to_shapecast
853//  CHECK-SAME: (%[[ARG:.+]]: vector<3x4xf32>)
854//       CHECK:   %[[R:.+]] = vector.shape_cast %[[ARG]] : vector<3x4xf32> to vector<12xf32>
855//       CHECK:   return %[[R]]
856func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vector<12xf32> {
857  %0 = vector.shape_cast %arg0 : vector<3x4xf32> to vector<1x12xf32>
858  %r = vector.extract %0[0] : vector<12xf32> from vector<1x12xf32>
859  return %r : vector<12xf32>
860}
861
862// -----
863
864// CHECK-LABEL: func @extract_no_fold_scalar_to_0d(
865//  CHECK-SAME:     %[[v:.*]]: vector<f32>)
866//       CHECK:   %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector<f32>
867//       CHECK:   return %[[extract]]
868func.func @extract_no_fold_scalar_to_0d(%v: vector<f32>) -> f32 {
869  %0 = vector.extract %v[] : f32 from vector<f32>
870  return %0 : f32
871}
872
873// -----
874
875// CHECK-LABEL: func @insert_fold_same_rank(
876//  CHECK-SAME:     %[[v:.*]]: vector<2x2xf32>)
877//       CHECK:      %[[CST:.+]] = arith.constant
878//  CHECK-SAME:                    : vector<2x2xf32>
879//       CHECK-NOT:  vector.insert
880//       CHECK:   return %[[CST]]
881func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> {
882  %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32>
883  %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32>
884  return %0 : vector<2x2xf32>
885}
886
887// -----
888
889// CHECK-LABEL: func @insert_no_fold_scalar_to_0d(
890//  CHECK-SAME:     %[[v:.*]]: vector<f32>)
891//       CHECK:   %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector<f32>
892//       CHECK:   return %[[extract]]
893func.func @insert_no_fold_scalar_to_0d(%v: vector<f32>) -> vector<f32> {
894  %cst = arith.constant 0.000000e+00 : f32
895  %0 = vector.insert %cst, %v [] : f32 into vector<f32>
896  return %0 : vector<f32>
897}
898
899// -----
900
901// CHECK-LABEL: dont_fold_expand_collapse
902//       CHECK:   %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32>
903//       CHECK:   %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32>
904//       CHECK:   return %[[B]] : vector<8x8xf32>
905func.func @dont_fold_expand_collapse(%arg0: vector<1x1x64xf32>) -> vector<8x8xf32> {
906    %0 = vector.shape_cast %arg0 : vector<1x1x64xf32> to vector<1x1x8x8xf32>
907    %1 = vector.shape_cast %0 : vector<1x1x8x8xf32> to vector<8x8xf32>
908    return %1 : vector<8x8xf32>
909}
910
911// -----
912
913// CHECK-LABEL: func @fold_broadcast_shapecast
914//  CHECK-SAME: (%[[V:.+]]: vector<4xf32>)
915//       CHECK:   return %[[V]]
916func.func @fold_broadcast_shapecast(%arg0: vector<4xf32>) -> vector<4xf32> {
917    %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x1x4xf32>
918    %1 = vector.shape_cast %0 : vector<1x1x4xf32> to vector<4xf32>
919    return %1 : vector<4xf32>
920}
921
922// -----
923
924// CHECK-LABEL: func @canonicalize_broadcast_shapecast_scalar
925//       CHECK:   vector.broadcast
926//   CHECK-NOT:   vector.shape_cast
927func.func @canonicalize_broadcast_shapecast_scalar(%arg0: f32) -> vector<1xf32> {
928    %0 = vector.broadcast %arg0 : f32 to vector<1x1x1xf32>
929    %1 = vector.shape_cast %0 : vector<1x1x1xf32> to vector<1xf32>
930    return %1 : vector<1xf32>
931}
932
933// -----
934
935// CHECK-LABEL: func @dont_fold_broadcast_shapecast_diff_shape
936//       CHECK:   vector.broadcast
937//       CHECK:   vector.shape_cast
938func.func @dont_fold_broadcast_shapecast_diff_shape(%arg0: vector<4xf32>) -> vector<8xf32> {
939    %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x2x4xf32>
940    %1 = vector.shape_cast %0 : vector<1x2x4xf32> to vector<8xf32>
941    return %1 : vector<8xf32>
942}
943
944// -----
945
946// CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_broadcast
947//       CHECK:   vector.broadcast
948//   CHECK-NOT:   vector.shape_cast
949func.func @canonicalize_broadcast_shapecast_to_broadcast(%arg0: vector<3xf32>) -> vector<8x3xf32> {
950    %0 = vector.broadcast %arg0 : vector<3xf32> to vector<2x4x3xf32>
951    %1 = vector.shape_cast %0 : vector<2x4x3xf32> to vector<8x3xf32>
952    return %1 : vector<8x3xf32>
953}
954
955// -----
956
957// CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_shapecast
958//   CHECK-NOT:   vector.broadcast
959//       CHECK:   vector.shape_cast {{.+}} : vector<3x4xf32> to vector<1x12xf32>
960func.func @canonicalize_broadcast_shapecast_to_shapecast(%arg0: vector<3x4xf32>) -> vector<1x12xf32> {
961    %0 = vector.broadcast %arg0 : vector<3x4xf32> to vector<1x1x3x4xf32>
962    %1 = vector.shape_cast %0 : vector<1x1x3x4xf32> to vector<1x12xf32>
963    return %1 : vector<1x12xf32>
964}
965
966// -----
967
968// CHECK-LABEL: fold_vector_transfer_masks
969func.func @fold_vector_transfer_masks(%A: memref<?x?xf32>) -> (vector<4x8xf32>, vector<4x[4]xf32>) {
970  // CHECK: %[[C0:.+]] = arith.constant 0 : index
971  %c0 = arith.constant 0 : index
972  // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
973  %f0 = arith.constant 0.0 : f32
974
975  %mask = vector.constant_mask [8, 4] : vector<8x4xi1>
976
977  %arith_all_true_mask = arith.constant dense<true> : vector<4x[4]xi1>
978
979  // CHECK: vector.transfer_read %{{.*}}, %[[F0]] {permutation_map
980  %1 = vector.transfer_read %A[%c0, %c0], %f0, %mask
981      {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : memref<?x?xf32>, vector<4x8xf32>
982
983  // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {permutation_map
984  vector.transfer_write %1, %A[%c0, %c0], %mask
985      {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<4x8xf32>, memref<?x?xf32>
986
987  // CHECK: vector.transfer_read %{{.*}}, %[[F0]] :
988  %2 = vector.transfer_read %A[%c0, %c0], %f0, %arith_all_true_mask : memref<?x?xf32>, vector<4x[4]xf32>
989
990  // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] :
991  vector.transfer_write %2, %A[%c0, %c0], %arith_all_true_mask : vector<4x[4]xf32>, memref<?x?xf32>
992
993  // CHECK: return
994  return %1, %2 : vector<4x8xf32>, vector<4x[4]xf32>
995}
996
997// -----
998
999// CHECK-LABEL: fold_vector_transfers
1000func.func @fold_vector_transfers(%A: memref<?x8xf32>) -> (vector<4x8xf32>, vector<4x9xf32>) {
1001  %c0 = arith.constant 0 : index
1002  %f0 = arith.constant 0.0 : f32
1003
1004  // CHECK: vector.transfer_read %{{.*}} {in_bounds = [false, true]}
1005  %1 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x8xf32>
1006
1007  // CHECK: vector.transfer_write %{{.*}} {in_bounds = [false, true]}
1008  vector.transfer_write %1, %A[%c0, %c0] : vector<4x8xf32>, memref<?x8xf32>
1009
1010  // Both dims may be out-of-bounds, attribute is elided.
1011  // CHECK: vector.transfer_read %{{.*}}
1012  // CHECK-NOT: in_bounds
1013  %2 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x9xf32>
1014
1015  // Both dims may be out-of-bounds, attribute is elided.
1016  // CHECK: vector.transfer_write %{{.*}}
1017  // CHECK-NOT: in_bounds
1018  vector.transfer_write %2, %A[%c0, %c0] : vector<4x9xf32>, memref<?x8xf32>
1019
1020  // CHECK: return
1021  return %1, %2 : vector<4x8xf32>, vector<4x9xf32>
1022}
1023
1024// -----
1025
1026// CHECK-LABEL: bitcast_folding
1027//  CHECK-SAME:   %[[A:.*]]: vector<4x8xf32>
1028//  CHECK-SAME:   %[[B:.*]]: vector<2xi32>
1029//  CHECK:        return %[[A]], %[[B]] : vector<4x8xf32>, vector<2xi32>
1030func.func @bitcast_folding(%I1: vector<4x8xf32>, %I2: vector<2xi32>) -> (vector<4x8xf32>, vector<2xi32>) {
1031  %0 = vector.bitcast %I1 : vector<4x8xf32> to vector<4x8xf32>
1032  %1 = vector.bitcast %I2 : vector<2xi32> to vector<4xi16>
1033  %2 = vector.bitcast %1 : vector<4xi16> to vector<2xi32>
1034  return %0, %2 : vector<4x8xf32>, vector<2xi32>
1035}
1036
1037// CHECK-LABEL: func @bitcast_f16_to_f32
1038//              bit pattern: 0x40004000
1039//       CHECK-DAG: %[[CST1:.+]] = arith.constant dense<2.00390625> : vector<4xf32>
1040//              bit pattern: 0x00000000
1041//       CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0.000000e+00> : vector<4xf32>
1042//       CHECK: return %[[CST0]], %[[CST1]]
1043func.func @bitcast_f16_to_f32() -> (vector<4xf32>, vector<4xf32>) {
1044  %cst0 = arith.constant dense<0.0> : vector<8xf16> // bit pattern: 0x0000
1045  %cst1 = arith.constant dense<2.0> : vector<8xf16> // bit pattern: 0x4000
1046  %cast0 = vector.bitcast %cst0: vector<8xf16> to vector<4xf32>
1047  %cast1 = vector.bitcast %cst1: vector<8xf16> to vector<4xf32>
1048  return %cast0, %cast1: vector<4xf32>, vector<4xf32>
1049}
1050
1051// CHECK-LABEL: func @bitcast_i8_to_i32
1052//              bit pattern: 0xA0A0A0A0
1053//       CHECK-DAG: %[[CST1:.+]] = arith.constant dense<-1600085856> : vector<4xi32>
1054//              bit pattern: 0x00000000
1055//       CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0> : vector<4xi32>
1056//       CHECK: return %[[CST0]], %[[CST1]]
1057func.func @bitcast_i8_to_i32() -> (vector<4xi32>, vector<4xi32>) {
1058  %cst0 = arith.constant dense<0> : vector<16xi8> // bit pattern: 0x00
1059  %cst1 = arith.constant dense<160> : vector<16xi8> // bit pattern: 0xA0
1060  %cast0 = vector.bitcast %cst0: vector<16xi8> to vector<4xi32>
1061  %cast1 = vector.bitcast %cst1: vector<16xi8> to vector<4xi32>
1062  return %cast0, %cast1: vector<4xi32>, vector<4xi32>
1063}
1064
1065// -----
1066
1067// CHECK-LABEL: broadcast_folding1
1068//       CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4xi32>
1069//   CHECK-NOT: vector.broadcast
1070//       CHECK: return %[[CST]]
1071func.func @broadcast_folding1() -> vector<4xi32> {
1072  %0 = arith.constant 42 : i32
1073  %1 = vector.broadcast %0 : i32 to vector<4xi32>
1074  return %1 : vector<4xi32>
1075}
1076
1077// -----
1078
1079// CHECK-LABEL: @broadcast_folding2
1080//       CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4x16xi32>
1081//   CHECK-NOT: vector.broadcast
1082//       CHECK: return %[[CST]]
1083func.func @broadcast_folding2() -> vector<4x16xi32> {
1084  %0 = arith.constant 42 : i32
1085  %1 = vector.broadcast %0 : i32 to vector<16xi32>
1086  %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1087  return %2 : vector<4x16xi32>
1088}
1089
1090// -----
1091
1092// CHECK-LABEL: @fold_consecutive_broadcasts(
1093//  CHECK-SAME:                              %[[ARG0:.*]]: i32
1094//       CHECK: %[[RESULT:.*]] = vector.broadcast %[[ARG0]] : i32 to vector<4x16xi32>
1095//       CHECK: return %[[RESULT]]
1096func.func @fold_consecutive_broadcasts(%a : i32) -> vector<4x16xi32> {
1097  %1 = vector.broadcast %a : i32 to vector<16xi32>
1098  %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1099  return %2 : vector<4x16xi32>
1100}
1101
1102// -----
1103
1104// CHECK-LABEL: shape_cast_constant
1105//       CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<3x4x2xi32>
1106//       CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<20x2xf32>
1107//       CHECK: return %[[CST0]], %[[CST1]] : vector<20x2xf32>, vector<3x4x2xi32>
1108func.func @shape_cast_constant() -> (vector<20x2xf32>, vector<3x4x2xi32>) {
1109  %cst = arith.constant dense<2.000000e+00> : vector<5x4x2xf32>
1110  %cst_1 = arith.constant dense<1> : vector<12x2xi32>
1111  %0 = vector.shape_cast %cst : vector<5x4x2xf32> to vector<20x2xf32>
1112  %1 = vector.shape_cast %cst_1 : vector<12x2xi32> to vector<3x4x2xi32>
1113  return %0, %1 : vector<20x2xf32>, vector<3x4x2xi32>
1114}
1115
1116// -----
1117
1118// CHECK-LABEL: extract_strided_constant
1119//       CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<2x13x3xi32>
1120//       CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<12x2xf32>
1121//       CHECK: return %[[CST0]], %[[CST1]] : vector<12x2xf32>, vector<2x13x3xi32>
1122func.func @extract_strided_constant() -> (vector<12x2xf32>, vector<2x13x3xi32>) {
1123  %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1124  %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1125  %0 = vector.extract_strided_slice %cst
1126    {offsets = [2, 3], sizes = [12, 2], strides = [1, 1]}
1127      : vector<29x7xf32> to vector<12x2xf32>
1128  %1 = vector.extract_strided_slice %cst_1
1129    {offsets = [1, 2, 5], sizes = [2, 13, 3], strides = [1, 1, 1]}
1130      : vector<4x37x9xi32> to vector<2x13x3xi32>
1131  return %0, %1 : vector<12x2xf32>, vector<2x13x3xi32>
1132}
1133
1134// -----
1135
1136// CHECK-LABEL: extract_strided_broadcast
1137//       CHECK:   %[[B:.*]] = vector.broadcast %{{.*}} : vector<4xf16> to vector<2x4xf16>
1138//  CHECK-NEXT:   return %[[B]] : vector<2x4xf16>
1139func.func @extract_strided_broadcast(%arg0: vector<4xf16>) -> vector<2x4xf16> {
1140 %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1141 %1 = vector.extract_strided_slice %0
1142  {offsets = [0, 0], sizes = [2, 4], strides = [1, 1]} :
1143  vector<16x4xf16> to vector<2x4xf16>
1144  return %1 : vector<2x4xf16>
1145}
1146
1147// -----
1148
1149// CHECK-LABEL: extract_strided_broadcast2
1150//       CHECK:   %[[E:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0], sizes = [2], strides = [1]} : vector<4xf16> to vector<2xf16>
1151//  CHECK-NEXT:   %[[B:.*]] = vector.broadcast %[[E]] : vector<2xf16> to vector<2x2xf16>
1152//  CHECK-NEXT:   return %[[B]] : vector<2x2xf16>
1153func.func @extract_strided_broadcast2(%arg0: vector<4xf16>) -> vector<2x2xf16> {
1154 %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1155 %1 = vector.extract_strided_slice %0
1156  {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} :
1157  vector<16x4xf16> to vector<2x2xf16>
1158  return %1 : vector<2x2xf16>
1159}
1160
1161// -----
1162
1163// CHECK-LABEL: func @extract_strided_broadcast3
1164//  CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
1165//       CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x4xf32>
1166//       CHECK: return %[[V]]
1167func.func @extract_strided_broadcast3(%arg0: vector<1xf32>) -> vector<1x4xf32> {
1168 %0 = vector.broadcast %arg0 : vector<1xf32> to vector<1x8xf32>
1169 %1 = vector.extract_strided_slice %0
1170      {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1171      : vector<1x8xf32> to vector<1x4xf32>
1172  return %1 : vector<1x4xf32>
1173}
1174
1175// -----
1176
1177// CHECK-LABEL: func @extract_strided_broadcast4
1178//  CHECK-SAME: (%[[ARG:.+]]: f32)
1179//       CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x4xf32>
1180//       CHECK: return %[[V]]
1181func.func @extract_strided_broadcast4(%arg0: f32) -> vector<1x4xf32> {
1182 %0 = vector.broadcast %arg0 : f32 to vector<1x8xf32>
1183 %1 = vector.extract_strided_slice %0
1184      {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1185      : vector<1x8xf32> to vector<1x4xf32>
1186  return %1 : vector<1x4xf32>
1187}
1188
1189// -----
1190
1191// CHECK-LABEL: consecutive_shape_cast
1192//       CHECK:   %[[C:.*]] = vector.shape_cast %{{.*}} : vector<16xf16> to vector<4x4xf16>
1193//  CHECK-NEXT:   return %[[C]] : vector<4x4xf16>
1194func.func @consecutive_shape_cast(%arg0: vector<16xf16>) -> vector<4x4xf16> {
1195  %0 = vector.shape_cast %arg0 : vector<16xf16> to vector<2x8xf16>
1196  %1 = vector.shape_cast %0 : vector<2x8xf16> to vector<4x4xf16>
1197  return %1 : vector<4x4xf16>
1198}
1199
1200// -----
1201
1202// CHECK-LABEL: func @dead_transfer_op
1203//   CHECK-NOT:   vector.transfer_read
1204//   CHECK-NOT:   vector.transfer_write
1205//       CHECK:   return
1206func.func @dead_transfer_op(%arg0 : tensor<4x4xf32>, %arg1 : memref<4x4xf32>,
1207                       %v0 : vector<1x4xf32>) {
1208  %c0 = arith.constant 0 : index
1209  %cf0 = arith.constant 0.0 : f32
1210  %r = vector.transfer_read %arg1[%c0, %c0], %cf0 :
1211    memref<4x4xf32>, vector<1x4xf32>
1212  %w = vector.transfer_write %v0, %arg0[%c0, %c0] :
1213    vector<1x4xf32>, tensor<4x4xf32>
1214  return
1215}
1216
1217// -----
1218
1219// CHECK-LABEL: func @dead_load
1220//   CHECK-NOT:   vector.maskedload
1221//   CHECK-NOT:   vector.gather
1222//   CHECK-NOT:   vector.expandload
1223//       CHECK:   return
1224func.func @dead_load(%base: memref<?xf32>, %indices: vector<16xi32>,
1225                          %mask: vector<16xi1>, %passthru: vector<16xf32>) {
1226  %c0 = arith.constant 0 : index
1227  %0 = vector.maskedload %base[%c0], %mask, %passthru :
1228    memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1229  %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
1230    memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1231  %2 = vector.expandload %base[%c0], %mask, %passthru :
1232    memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1233  return
1234}
1235
1236// -----
1237
1238#contraction_accesses0 = [
1239  affine_map<(i, j, k) -> (i, k)>,
1240  affine_map<(i, j, k) -> (k, j)>,
1241  affine_map<(i, j, k) -> (i, j)>
1242]
1243#contraction_trait0 = {
1244  indexing_maps = #contraction_accesses0,
1245  iterator_types = ["parallel", "parallel", "reduction"]
1246}
1247
1248// CHECK-LABEL: func @contractions
1249//  CHECK-SAME:   %[[A:[0-9a-zA-Z]+]]: vector<2x3xf32>
1250//  CHECK-SAME:   %[[B:[0-9a-zA-Z]+]]: vector<3x4xf32>
1251//  CHECK-SAME:   %[[C:[0-9a-zA-Z]+]]: vector<2x4xf32>
1252//  CHECK-SAME:   %[[A_I8:[0-9a-zA-Z]+]]: vector<2x3xi8>
1253//  CHECK-SAME:   %[[B_I8:[0-9a-zA-Z]+]]: vector<3x4xi8>
1254//  CHECK-SAME:   %[[C_I8:[0-9a-zA-Z]+]]: vector<2x4xi8>
1255func.func @contractions(%a: vector<2x3xf32>, %b: vector<3x4xf32>, %c: vector<2x4xf32>,
1256                   %a_i8: vector<2x3xi8>, %b_i8: vector<3x4xi8>, %c_i8: vector<2x4xi8>)
1257  -> (vector<2x4xf32>, vector<2x4xi8>)
1258{
1259  // CHECK-NOT: arith.constant
1260  %vf_0 = arith.constant dense <0.0>: vector<2x4xf32>
1261  // CHECK-NOT: arith.addf
1262  //     CHECK: %[[D:.*]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]]
1263  %0 = vector.contract #contraction_trait0 %a, %b, %vf_0:
1264    vector<2x3xf32>, vector<3x4xf32> into vector<2x4xf32>
1265  // CHECK-NOT: arith.addf
1266  %1 = arith.addf %0, %c: vector<2x4xf32>
1267
1268  // CHECK-NOT: arith.constant
1269  %vi8_0 = arith.constant dense <0>: vector<2x4xi8>
1270  // CHECK-NOT: arith.addi
1271  //     CHECK: %[[D_I8:.*]] = vector.contract {{.*}} %[[A_I8]], %[[B_I8]], %[[C_I8]]
1272  %i8_0 = vector.contract #contraction_trait0 %a_i8, %b_i8, %vi8_0:
1273    vector<2x3xi8>, vector<3x4xi8> into vector<2x4xi8>
1274  // CHECK-NOT: arith.addi
1275  %i8_1 = arith.addi %i8_0, %c_i8: vector<2x4xi8>
1276
1277  // CHECK: return %[[D]], %[[D_I8]]
1278  return %1, %i8_1: vector<2x4xf32>, vector<2x4xi8>
1279}
1280
1281// -----
1282
1283// CHECK-LABEL: func @transfer_folding_1
1284//  CHECK-SAME:   %[[T0:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1285//  CHECK-SAME:   %[[T1:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1286func.func @transfer_folding_1(%t0: tensor<2x3x4xf32>, %t1: tensor<2x3x4xf32>)
1287  -> (tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>)
1288{
1289  %c0 = arith.constant 0 : index
1290  %pad = arith.constant 0.0 : f32
1291  %v = vector.transfer_read %t0[%c0, %c0, %c0], %pad {in_bounds = [true, true, true]} :
1292    tensor<2x3x4xf32>, vector<2x3x4xf32>
1293
1294  %r0 = vector.transfer_write %v, %t1[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1295    vector<2x3x4xf32>, tensor<2x3x4xf32>
1296
1297  %t2 = "test.constant"() { value = dense<6.0> : tensor<2x3x4xf32>} : () -> (tensor<2x3x4xf32>)
1298  %r1 = vector.transfer_write %v, %t2[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1299    vector<2x3x4xf32>, tensor<2x3x4xf32>
1300
1301
1302  // CHECK-NEXT: some_op_that_may_have_side_effects
1303  %t3 = "some_op_that_may_have_side_effects"() : () -> (tensor<2x3x4xf32>)
1304  %r2 = vector.transfer_write %v, %t0[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1305    vector<2x3x4xf32>, tensor<2x3x4xf32>
1306
1307  // CHECK-NEXT: return %[[T0]], %[[T0]], %[[T0]]
1308  return %r0, %r1, %r2: tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>
1309}
1310
1311// -----
1312
1313// CHECK-LABEL: func @store_after_load_tensor
1314//  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>)
1315//   CHECK-NOT:   vector.transfer_read
1316//   CHECK-NOT:   vector.transfer_write
1317//       CHECK:   return %[[ARG]] : tensor<4x4xf32>
1318func.func @store_after_load_tensor(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1319  %c1 = arith.constant 1 : index
1320  %c0 = arith.constant 0 : index
1321  %cf0 = arith.constant 0.0 : f32
1322  %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1323    tensor<4x4xf32>, vector<1x4xf32>
1324  %w0 = vector.transfer_write %0, %arg0[%c1, %c0] :
1325    vector<1x4xf32>, tensor<4x4xf32>
1326  return %w0 : tensor<4x4xf32>
1327}
1328
1329// -----
1330
1331// CHECK-LABEL: func @store_after_load_tensor_negative
1332//       CHECK:   vector.transfer_read
1333//       CHECK:   vector.transfer_write
1334//       CHECK:   return
1335func.func @store_after_load_tensor_negative(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1336  %c1 = arith.constant 1 : index
1337  %c0 = arith.constant 0 : index
1338  %cf0 = arith.constant 0.0 : f32
1339  %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1340    tensor<4x4xf32>, vector<1x4xf32>
1341  %w0 = vector.transfer_write %0, %arg0[%c0, %c0] :
1342    vector<1x4xf32>, tensor<4x4xf32>
1343  return %w0 : tensor<4x4xf32>
1344}
1345
1346// -----
1347
1348// CHECK-LABEL: func @store_to_load_tensor
1349//  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<1x4xf32>, %[[V1:.*]]: vector<1x4xf32>)
1350//   CHECK-NOT:   vector.transfer_write
1351//   CHECK-NOT:   vector.transfer_read
1352//       CHECK:   return %[[V0]] : vector<1x4xf32>
1353func.func @store_to_load_tensor(%arg0 : tensor<4x4xf32>,
1354  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>) -> vector<1x4xf32> {
1355  %c1 = arith.constant 1 : index
1356  %c2 = arith.constant 2 : index
1357  %c0 = arith.constant 0 : index
1358  %cf0 = arith.constant 0.0 : f32
1359  %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1360    vector<1x4xf32>, tensor<4x4xf32>
1361  %w1 = vector.transfer_write %v1, %w0[%c2, %c0] {in_bounds = [true, true]} :
1362    vector<1x4xf32>, tensor<4x4xf32>
1363  %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1364    tensor<4x4xf32>, vector<1x4xf32>
1365  return %0 : vector<1x4xf32>
1366}
1367
1368// -----
1369
1370// CHECK-LABEL: func @store_to_load_negative_tensor
1371//       CHECK:   vector.transfer_write
1372//       CHECK:   vector.transfer_write
1373//       CHECK:   %[[V:.*]] = vector.transfer_read
1374//       CHECK:   return %[[V]] : vector<1x4xf32>
1375func.func @store_to_load_negative_tensor(%arg0 : tensor<4x4xf32>,
1376  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> {
1377  %c1 = arith.constant 1 : index
1378  %c2 = arith.constant 2 : index
1379  %c0 = arith.constant 0 : index
1380  %cf0 = arith.constant 0.0 : f32
1381  %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1382    vector<1x4xf32>, tensor<4x4xf32>
1383  %w1 = vector.transfer_write %v0, %w0[%i, %i] {in_bounds = [true, true]} :
1384    vector<1x4xf32>, tensor<4x4xf32>
1385  %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1386    tensor<4x4xf32>, vector<1x4xf32>
1387  return %0 : vector<1x4xf32>
1388}
1389
1390// -----
1391
1392// CHECK-LABEL: func @store_to_load_tensor_broadcast
1393//  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<4x2xf32>)
1394//       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x2xf32> to vector<6x4x2xf32>
1395//       CHECK:   %[[T:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<6x4x2xf32> to vector<4x2x6xf32>
1396//       CHECK:   return %[[T]] : vector<4x2x6xf32>
1397func.func @store_to_load_tensor_broadcast(%arg0 : tensor<4x4xf32>,
1398  %v0 : vector<4x2xf32>) -> vector<4x2x6xf32> {
1399  %c0 = arith.constant 0 : index
1400  %cf0 = arith.constant 0.0 : f32
1401  %w0 = vector.transfer_write %v0, %arg0[%c0, %c0] {in_bounds = [true, true]} :
1402    vector<4x2xf32>, tensor<4x4xf32>
1403  %0 = vector.transfer_read %w0[%c0, %c0], %cf0 {in_bounds = [true, true, true],
1404  permutation_map = affine_map<(d0, d1) -> (d0, d1, 0)>} :
1405    tensor<4x4xf32>, vector<4x2x6xf32>
1406  return %0 : vector<4x2x6xf32>
1407}
1408
1409// -----
1410
1411// CHECK-LABEL: func @store_to_load_tensor_broadcast_scalable
1412//  CHECK-SAME: (%[[ARG:.*]]: tensor<?xf32>, %[[V0:.*]]: vector<[4]xf32>)
1413//       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<[4]xf32> to vector<6x[4]xf32>
1414//       CHECK:   return %[[B]] : vector<6x[4]xf32>
1415func.func @store_to_load_tensor_broadcast_scalable(%arg0 : tensor<?xf32>,
1416  %v0 : vector<[4]xf32>) -> vector<6x[4]xf32> {
1417  %c0 = arith.constant 0 : index
1418  %cf0 = arith.constant 0.0 : f32
1419  %w0 = vector.transfer_write %v0, %arg0[%c0] {in_bounds = [true]} :
1420    vector<[4]xf32>, tensor<?xf32>
1421  %0 = vector.transfer_read %w0[%c0], %cf0 {in_bounds = [true, true],
1422  permutation_map = affine_map<(d0) -> (0, d0)>} :
1423    tensor<?xf32>, vector<6x[4]xf32>
1424  return %0 : vector<6x[4]xf32>
1425}
1426
1427// -----
1428
1429// CHECK-LABEL: func @store_to_load_tensor_perm_broadcast
1430//  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4x4xf32>, %[[V0:.*]]: vector<4x1xf32>)
1431//       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x1xf32> to vector<100x5x4x1xf32>
1432//       CHECK:   %[[T:.*]] = vector.transpose %[[B]], [3, 0, 2, 1] : vector<100x5x4x1xf32> to vector<1x100x4x5xf32>
1433//       CHECK:   return %[[T]] : vector<1x100x4x5xf32>
1434func.func @store_to_load_tensor_perm_broadcast(%arg0 : tensor<4x4x4xf32>,
1435  %v0 : vector<4x1xf32>) -> vector<1x100x4x5xf32> {
1436  %c0 = arith.constant 0 : index
1437  %cf0 = arith.constant 0.0 : f32
1438  %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] {in_bounds = [true, true],
1439  permutation_map = affine_map<(d0, d1, d2) -> (d2, d1)>} :
1440    vector<4x1xf32>, tensor<4x4x4xf32>
1441  %0 = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 {in_bounds = [true, true, true, true],
1442  permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
1443    tensor<4x4x4xf32>, vector<1x100x4x5xf32>
1444  return %0 : vector<1x100x4x5xf32>
1445}
1446
1447// -----
1448
1449
1450// CHECK-LABEL: func @dead_store_tensor
1451//   CHECK-DAG:      %[[C0:.*]] = arith.constant 0 : index
1452//   CHECK-DAG:      %[[C1:.*]] = arith.constant 1 : index
1453//   CHECK-DAG:      %[[C2:.*]] = arith.constant 2 : index
1454//   CHECK-NOT:   vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1455//       CHECK:   vector.transfer_write {{.*}}, {{.*}}[%[[C2]], %[[C0]]
1456//       CHECK:   %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1457//       CHECK:   return %[[VTW]] : tensor<4x4xf32>
1458func.func @dead_store_tensor(%arg0 : tensor<4x4xf32>,
1459  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1460  %c1 = arith.constant 1 : index
1461  %c2 = arith.constant 2 : index
1462  %c0 = arith.constant 0 : index
1463  %cf0 = arith.constant 0.0 : f32
1464  %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1465    vector<1x4xf32>, tensor<4x4xf32>
1466  %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1467    vector<1x4xf32>, tensor<4x4xf32>
1468  %w2 = vector.transfer_write %v1, %w1[%c1, %c0] {in_bounds = [true, true]} :
1469    vector<1x4xf32>, tensor<4x4xf32>
1470  return %w2 : tensor<4x4xf32>
1471}
1472
1473// -----
1474
1475// CHECK-LABEL: func @dead_store_tensor_negative
1476//   CHECK-DAG:      %[[C0:.*]] = arith.constant 0 : index
1477//   CHECK-DAG:      %[[C1:.*]] = arith.constant 1 : index
1478//       CHECK:   vector.transfer_write
1479//       CHECK:   vector.transfer_write
1480//       CHECK:   vector.transfer_read
1481//       CHECK:   %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]]
1482//       CHECK:   return %[[VTW]] : tensor<4x4xf32>
1483func.func @dead_store_tensor_negative(%arg0 : tensor<4x4xf32>,
1484  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1485  %c1 = arith.constant 1 : index
1486  %c2 = arith.constant 2 : index
1487  %c0 = arith.constant 0 : index
1488  %cf0 = arith.constant 0.0 : f32
1489  %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1490    vector<1x4xf32>, tensor<4x4xf32>
1491  %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1492    vector<1x4xf32>, tensor<4x4xf32>
1493  %0 = vector.transfer_read %w1[%i, %i], %cf0 {in_bounds = [true, true]} :
1494    tensor<4x4xf32>, vector<1x4xf32>
1495  %x = arith.addf %0, %0 : vector<1x4xf32>
1496  %w2 = vector.transfer_write %x, %w0[%c1, %c0] {in_bounds = [true, true]} :
1497    vector<1x4xf32>, tensor<4x4xf32>
1498  return %w2 : tensor<4x4xf32>
1499}
1500
1501// -----
1502
1503//       CHECK: #[[$MAP:[0-9a-z]+]] = affine_map<(d0, d1) -> (d1, d0)>
1504
1505// CHECK-LABEL: func @swap_extract_slice_transfer_write
1506//  CHECK-SAME:   %[[VEC:.*]]: vector<8x4xf32>
1507//  CHECK-SAME:   %[[INIT_TENSOR:.*]]: tensor<4x8xf32>,
1508//  CHECK-SAME:   %[[ITER_ARG:.*]]: tensor<64x64xf32>,
1509//  CHECK-SAME:   %[[IV:.*]]: index, %[[SZ:.*]]: index)
1510func.func @swap_extract_slice_transfer_write(%arg0 : vector<8x4xf32>,
1511                                             %arg1 : tensor<4x8xf32>,
1512                                             %arg2 : tensor<64x64xf32>,
1513                                             %iv : index, %sz : index) -> tensor<64x64xf32> {
1514  //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
1515  %c0 = arith.constant 0 : index
1516
1517  //       CHECK:   %[[T0:.*]] = tensor.extract_slice %[[ITER_ARG]]
1518  //  CHECK-SAME:                 [%[[IV]], 16] [%[[SZ]], 8]
1519  //       CHECK:   %[[T1:.*]] = vector.transfer_write %[[VEC]]
1520  //  CHECK-SAME:                 %[[T0]][%[[C0]], %[[C0]]]
1521  //  CHECK-SAME:                 in_bounds = [true, false]
1522  //  CHECK-SAME:                 permutation_map = #[[$MAP]]
1523  //       CHECK:   %[[T2:.*]] = tensor.insert_slice %[[T1]] into %[[ITER_ARG]]
1524  //  CHECK-SAME:                 [%[[IV]], 16] [%[[SZ]], 8]
1525  %0 = vector.transfer_write %arg0, %arg1[%c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x4xf32>, tensor<4x8xf32>
1526  %1 = tensor.extract_slice %0[0, 0] [%sz, 8] [1, 1] : tensor<4x8xf32> to tensor<?x8xf32>
1527  %2 = tensor.insert_slice %1 into %arg2[%iv, 16] [%sz, 8] [1, 1] : tensor<?x8xf32> into tensor<64x64xf32>
1528
1529  //       CHECK:   return %[[T2]]
1530  func.return %2 : tensor<64x64xf32>
1531}
1532
1533// -----
1534
1535// CHECK-LABEL: func @do_not_swap_extract_slice_transfer_write
1536//  CHECK-SAME:   %[[VEC:.*]]: vector<8xf32>,
1537//  CHECK-SAME:   %[[VEC_SMALL:.*]]: vector<4xf32>,
1538//  CHECK-SAME:   %[[INIT_TENSOR:.*]]: tensor<8xf32>,
1539//  CHECK-SAME:   %[[ITER_ARG:.*]]: tensor<64xf32>,
1540//  CHECK-SAME:   %[[IV:.*]]: index, %[[SZ:.*]]: index)
1541func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>,
1542                                                    %arg1 : vector<4xf32>,
1543                                                    %arg2 : tensor<8xf32>,
1544                                                    %arg3 : tensor<64xf32>,
1545                                                    %iv : index, %sz : index) -> (tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) {
1546  //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
1547  %c0 = arith.constant 0 : index
1548
1549  // Don't swap if the extracted and inserted slices do not match.
1550  //       CHECK:   %[[T0:.*]] = vector.transfer_write %[[VEC]]
1551  //       CHECK:   %[[T1:.*]] = tensor.extract_slice %[[T0]]
1552  //       CHECK:   %[[T2:.*]] = tensor.insert_slice %[[T1]]
1553  %0 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1554  %1 = tensor.extract_slice %0[0] [%iv] [1] : tensor<8xf32> to tensor<?xf32>
1555  %2 = tensor.insert_slice %1 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1556
1557  // Don't swap if the TransferWriteOp takes a small vector.
1558  //       CHECK:   %[[T3:.*]] = vector.transfer_write %[[VEC_SMALL]]
1559  //       CHECK:   %[[T4:.*]] = tensor.extract_slice %[[T3]]
1560  //       CHECK:   %[[T5:.*]] = tensor.insert_slice %[[T4]]
1561  %3 = vector.transfer_write %arg1, %arg2[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<8xf32>
1562  %4 = tensor.extract_slice %3[0] [%sz] [1] : tensor<8xf32> to tensor<?xf32>
1563  %5 = tensor.insert_slice %4 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1564
1565  // Don't swap if the one of the operations is rank-reducing.
1566  //       CHECK:   %[[T6:.*]] = vector.transfer_write %[[VEC]]
1567  //       CHECK:   %[[T7:.*]] = tensor.extract_slice %[[T6]]
1568  //       CHECK:   %[[T8:.*]] = tensor.insert_slice %[[T7]]
1569  %6 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1570  %7 = tensor.extract_slice %6[0] [1] [1] : tensor<8xf32> to tensor<f32>
1571  %8 = tensor.insert_slice %7 into %arg3[%iv] [1] [1] : tensor<f32> into tensor<64xf32>
1572
1573  //       CHECK:   return %[[T2]], %[[T5]], %[[T8]]
1574  func.return %2, %5, %8 : tensor<64xf32>, tensor<64xf32>, tensor<64xf32>
1575}
1576
1577// -----
1578
1579// CHECK-LABEL: func @vector_multi_reduction_single_parallel(
1580//  CHECK-SAME:     %[[v:.*]]: vector<2xf32>,
1581func.func @vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>) -> vector<2xf32> {
1582    %0 = vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32>
1583
1584//       CHECK:     return %[[v]] : vector<2xf32>
1585    return %0 : vector<2xf32>
1586}
1587
1588// -----
1589
1590// CHECK-LABEL: func @masked_vector_multi_reduction_single_parallel(
1591//  CHECK-SAME:     %[[VAL_0:.*]]: vector<2xf32>, %{{.*}}: vector<2xf32>,
1592func.func @masked_vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>, %mask: vector<2xi1>) -> vector<2xf32> {
1593    %0 = vector.mask %mask { vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32> } : vector<2xi1> -> vector<2xf32>
1594//       CHECK:   return %[[VAL_0]] : vector<2xf32>
1595    return %0 : vector<2xf32>
1596}
1597
1598// -----
1599
1600// CHECK-LABEL: func @vector_multi_reduction_unit_dimensions(
1601//  CHECK-SAME: %[[SOURCE:.+]]: vector<5x1x4x1x20xf32>, %[[ACC:.+]]: vector<5x4x20xf32>
1602func.func @vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x4x20xf32>) -> vector<5x4x20xf32> {
1603//       CHECK:   %[[CAST:.+]] = vector.shape_cast  %[[SOURCE]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1604//       CHECK:   %[[RESULT:.+]] = arith.mulf  %[[ACC]], %[[CAST]] : vector<5x4x20xf32>
1605    %0 = vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1606
1607//       CHECK:     return %[[RESULT]] : vector<5x4x20xf32>
1608    return %0 : vector<5x4x20xf32>
1609}
1610
1611// -----
1612// CHECK-LABEL:   func.func @vector_multi_reduction_scalable(
1613// CHECK-SAME:     %[[VAL_0:.*]]: vector<1x[4]x1xf32>,
1614// CHECK-SAME:     %[[VAL_1:.*]]: vector<1x[4]xf32>,
1615// CHECK-SAME:     %[[VAL_2:.*]]: vector<1x[4]x1xi1>)
1616func.func @vector_multi_reduction_scalable(%source: vector<1x[4]x1xf32>,
1617                                           %acc: vector<1x[4]xf32>,
1618                                           %mask: vector<1x[4]x1xi1>) -> vector<1x[4]xf32> {
1619// CHECK:           %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<1x[4]x1xi1> to vector<1x[4]xi1>
1620// CHECK:           %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x[4]x1xf32> to vector<1x[4]xf32>
1621// CHECK:           %[[VAL_5:.*]] = arith.addf %[[VAL_1]], %[[VAL_4]] : vector<1x[4]xf32>
1622// CHECK:           %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<1x[4]xi1>, vector<1x[4]xf32>
1623    %0 = vector.mask %mask { vector.multi_reduction <add>, %source, %acc [2] : vector<1x[4]x1xf32> to vector<1x[4]xf32> } :
1624          vector<1x[4]x1xi1> -> vector<1x[4]xf32>
1625
1626    return %0 : vector<1x[4]xf32>
1627}
1628
1629// -----
1630
1631// CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions
1632//  CHECK-SAME: %[[VAL_0:.*]]: vector<5x1x4x1x20xf32>, %[[VAL_1:.*]]: vector<5x4x20xf32>,
1633//  CHECK-SAME: %[[VAL_2:.*]]: vector<5x1x4x1x20xi1>)
1634func.func @masked_vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>,
1635                                                         %acc: vector<5x4x20xf32>,
1636                                                         %mask: vector<5x1x4x1x20xi1>) -> vector<5x4x20xf32> {
1637//       CHECK:   %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<5x1x4x1x20xi1> to vector<5x4x20xi1>
1638//       CHECK:   %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1639//       CHECK:   %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : vector<5x4x20xf32>
1640//       CHECK:   %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<5x4x20xi1>, vector<5x4x20xf32>
1641%0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> } :
1642           vector<5x1x4x1x20xi1> -> vector<5x4x20xf32>
1643    return %0 : vector<5x4x20xf32>
1644}
1645
1646// -----
1647
1648// CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_fail(
1649//  CHECK-SAME: %[[SRC:.+]]: vector<5x1x4x1x20xf32>, %[[ACCUM:.+]]: vector<5x1x20xf32>
1650func.func @vector_multi_reduction_unit_dimensions_fail(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x1x20xf32>) -> vector<5x1x20xf32> {
1651//       CHECK:   %[[RES:.+]] = vector.multi_reduction  <mul>, %[[SRC]], %[[ACCUM]] [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1652    %0 = vector.multi_reduction <mul>, %source, %acc [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1653
1654//       CHECK:     return %[[RES]] : vector<5x1x20xf32>
1655    return %0 : vector<5x1x20xf32>
1656}
1657
1658// -----
1659
1660// CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_single_elem(
1661//  CHECK-SAME: %[[SOURCE:.+]]: vector<1x1x1xf32>, %[[ACC:.+]]: f32
1662func.func @vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32) -> f32 {
1663//       CHECK:   %[[CAST:.+]] = vector.extract  %[[SOURCE]][0, 0, 0] : f32 from vector<1x1x1xf32>
1664//       CHECK:   %[[RESULT:.+]] = arith.mulf  %[[ACC]], %[[CAST]] : f32
1665    %0 = vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32
1666
1667//       CHECK:     return %[[RESULT]] : f32
1668    return %0 : f32
1669}
1670
1671// -----
1672
1673// CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions_single_elem(
1674//  CHECK-SAME: %[[VAL_0:.*]]: vector<1x1x1xf32>, %[[VAL_1:.*]]: f32,
1675//  CHECK-SAME: %[[VAL_2:.*]]: vector<1x1x1xi1>)
1676func.func @masked_vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32, %mask: vector<1x1x1xi1>) -> f32 {
1677      // CHECK:           %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0, 0, 0] : i1 from vector<1x1x1xi1>
1678      // CHECK:           %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0, 0, 0] : f32 from vector<1x1x1xf32>
1679      // CHECK:           %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : f32
1680      // CHECK:           %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : f32
1681  %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32 } : vector<1x1x1xi1> -> f32
1682    return %0 : f32
1683}
1684
1685// -----
1686
1687// CHECK-LABEL: func @insert_strided_slice_full_range
1688//  CHECK-SAME: %[[SOURCE:.+]]: vector<16x16xf16>, %{{.+}}: vector<16x16xf16>
1689func.func @insert_strided_slice_full_range(%source: vector<16x16xf16>, %dest: vector<16x16xf16>) -> vector<16x16xf16> {
1690  %0 = vector.insert_strided_slice %source, %dest {offsets = [0, 0], strides = [1, 1]} : vector<16x16xf16> into vector<16x16xf16>
1691  // CHECK: return %[[SOURCE]]
1692  return %0: vector<16x16xf16>
1693}
1694
1695// -----
1696
1697// CHECK-LABEL: extract_strided_splat
1698//       CHECK:   %[[B:.*]] = vector.splat %{{.*}} : vector<2x4xf16>
1699//  CHECK-NEXT:   return %[[B]] : vector<2x4xf16>
1700func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> {
1701 %0 = vector.splat %arg0 : vector<16x4xf16>
1702 %1 = vector.extract_strided_slice %0
1703  {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} :
1704  vector<16x4xf16> to vector<2x4xf16>
1705  return %1 : vector<2x4xf16>
1706}
1707
1708// -----
1709
1710// CHECK-LABEL: func @insert_extract_to_broadcast
1711//  CHECK-SAME: (%[[ARG0:.*]]: vector<1x1x4xf32>, %[[ARG1:.*]]: vector<4xf32>)
1712//       CHECK:   %[[V0:.*]] = vector.extract %[[ARG0]][0, 0] : vector<4xf32> from vector<1x1x4xf32>
1713//       CHECK:   %[[V1:.*]] = vector.broadcast %[[ARG1]] : vector<4xf32> to vector<1x1x4xf32>
1714//       CHECK:   return %[[V0]], %[[V1]] : vector<4xf32>, vector<1x1x4xf32>
1715func.func @insert_extract_to_broadcast(%arg0 : vector<1x1x4xf32>,
1716  %arg1 : vector<4xf32>) -> (vector<4xf32>, vector<1x1x4xf32>) {
1717  %0 = vector.extract %arg0[0, 0] : vector<4xf32> from vector<1x1x4xf32>
1718  %1 = vector.insert %arg1, %arg0 [0, 0] : vector<4xf32> into vector<1x1x4xf32>
1719  return %0, %1 : vector<4xf32>, vector<1x1x4xf32>
1720}
1721
1722// -----
1723
1724// CHECK-LABEL: func.func @extract_splat_constant
1725//   CHECK-DAG:   %[[CST1:.*]] = arith.constant 1 : i32
1726//   CHECK-DAG:   %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<7xf32>
1727//  CHECK-NEXT:   return %[[CST0]], %[[CST1]] : vector<7xf32>, i32
1728func.func @extract_splat_constant() -> (vector<7xf32>, i32) {
1729  %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1730  %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1731  %0 = vector.extract %cst[2] : vector<7xf32> from vector<29x7xf32>
1732  %1 = vector.extract %cst_1[1, 4, 5] : i32 from vector<4x37x9xi32>
1733  return %0, %1 : vector<7xf32>, i32
1734}
1735
1736// -----
1737
1738// CHECK-LABEL: func.func @extract_1d_constant
1739//   CHECK-DAG: %[[I32CST:.*]] = arith.constant 3 : i32
1740//   CHECK-DAG: %[[IDXCST:.*]] = arith.constant 1 : index
1741//   CHECK-DAG: %[[F32CST:.*]] = arith.constant 2.000000e+00 : f32
1742//  CHECK-NEXT: return %[[I32CST]], %[[IDXCST]], %[[F32CST]] : i32, index, f32
1743func.func @extract_1d_constant() -> (i32, index, f32) {
1744  %icst = arith.constant dense<[1, 2, 3, 4]> : vector<4xi32>
1745  %e = vector.extract %icst[2] : i32 from vector<4xi32>
1746  %idx_cst = arith.constant dense<[0, 1, 2]> : vector<3xindex>
1747  %f = vector.extract %idx_cst[1] : index from vector<3xindex>
1748  %fcst = arith.constant dense<[2.000000e+00, 3.000000e+00, 4.000000e+00]> : vector<3xf32>
1749  %g = vector.extract %fcst[0] : f32 from vector<3xf32>
1750  return %e, %f, %g : i32, index, f32
1751}
1752
1753// -----
1754
1755// CHECK-LABEL: func.func @extract_2d_constant
1756//   CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1757//   CHECK-DAG: %[[BCST:.*]] = arith.constant 2 : i32
1758//   CHECK-DAG: %[[CCST:.*]] = arith.constant 3 : i32
1759//   CHECK-DAG: %[[DCST:.*]] = arith.constant 5 : i32
1760//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1761func.func @extract_2d_constant() -> (i32, i32, i32, i32) {
1762  %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1763  %a = vector.extract %cst[0, 0] : i32 from vector<2x3xi32>
1764  %b = vector.extract %cst[0, 2] : i32 from vector<2x3xi32>
1765  %c = vector.extract %cst[1, 0] : i32 from vector<2x3xi32>
1766  %d = vector.extract %cst[1, 2] : i32 from vector<2x3xi32>
1767  return %a, %b, %c, %d : i32, i32, i32, i32
1768}
1769
1770// -----
1771
1772// CHECK-LABEL: func.func @extract_vector_2d_constant
1773//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1774//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[3, 4, 5]> : vector<3xi32>
1775//  CHECK-NEXT: return %[[ACST]], %[[BCST]] : vector<3xi32>, vector<3xi32>
1776func.func @extract_vector_2d_constant() -> (vector<3xi32>, vector<3xi32>) {
1777  %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1778  %a = vector.extract %cst[0] : vector<3xi32> from vector<2x3xi32>
1779  %b = vector.extract %cst[1] : vector<3xi32> from vector<2x3xi32>
1780  return %a, %b : vector<3xi32>, vector<3xi32>
1781}
1782
1783// -----
1784
1785// CHECK-LABEL: func.func @extract_3d_constant
1786//   CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1787//   CHECK-DAG: %[[BCST:.*]] = arith.constant 1 : i32
1788//   CHECK-DAG: %[[CCST:.*]] = arith.constant 9 : i32
1789//   CHECK-DAG: %[[DCST:.*]] = arith.constant 10 : i32
1790//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1791func.func @extract_3d_constant() -> (i32, i32, i32, i32) {
1792  %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1793  %a = vector.extract %cst[0, 0, 0] : i32 from vector<2x3x2xi32>
1794  %b = vector.extract %cst[0, 0, 1] : i32 from vector<2x3x2xi32>
1795  %c = vector.extract %cst[1, 1, 1] : i32 from vector<2x3x2xi32>
1796  %d = vector.extract %cst[1, 2, 0] : i32 from vector<2x3x2xi32>
1797  return %a, %b, %c, %d : i32, i32, i32, i32
1798}
1799
1800// -----
1801
1802// CHECK-LABEL: func.func @extract_vector_3d_constant
1803//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
1804//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[6, 7\], \[8, 9\], \[10, 11\]\]}}> : vector<3x2xi32>
1805//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[8, 9]> : vector<2xi32>
1806//   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[10, 11]> : vector<2xi32>
1807//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1808func.func @extract_vector_3d_constant() -> (vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>) {
1809  %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1810  %a = vector.extract %cst[0] : vector<3x2xi32> from vector<2x3x2xi32>
1811  %b = vector.extract %cst[1] : vector<3x2xi32> from vector<2x3x2xi32>
1812  %c = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1813  %d = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1814  return %a, %b, %c, %d : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1815}
1816
1817// -----
1818
1819// CHECK-LABEL: func.func @extract_splat_vector_3d_constant
1820//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2xi32>
1821//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<4> : vector<2xi32>
1822//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<5> : vector<2xi32>
1823//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<2xi32>, vector<2xi32>, vector<2xi32>
1824func.func @extract_splat_vector_3d_constant() -> (vector<2xi32>, vector<2xi32>, vector<2xi32>) {
1825  %cst = arith.constant dense<[[[0, 0], [1, 1], [2, 2]], [[3, 3], [4, 4], [5, 5]]]> : vector<2x3x2xi32>
1826  %a = vector.extract %cst[0, 0] : vector<2xi32> from vector<2x3x2xi32>
1827  %b = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1828  %c = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1829  return %a, %b, %c : vector<2xi32>, vector<2xi32>, vector<2xi32>
1830}
1831
1832// -----
1833
1834// CHECK-LABEL: func.func @extract_strided_slice_1d_constant
1835//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1836//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[1, 2]> : vector<2xi32>
1837//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<2> : vector<1xi32>
1838//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<2xi32>, vector<1xi32>
1839func.func @extract_strided_slice_1d_constant() -> (vector<3xi32>, vector<2xi32>, vector<1xi32>) {
1840  %cst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1841  %a = vector.extract_strided_slice %cst
1842   {offsets = [0], sizes = [3], strides = [1]} : vector<3xi32> to vector<3xi32>
1843  %b = vector.extract_strided_slice %cst
1844   {offsets = [1], sizes = [2], strides = [1]} : vector<3xi32> to vector<2xi32>
1845  %c = vector.extract_strided_slice %cst
1846   {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
1847  return %a, %b, %c : vector<3xi32>, vector<2xi32>, vector<1xi32>
1848}
1849
1850// -----
1851
1852// CHECK-LABEL: func.func @extract_strided_slice_2d_constant
1853//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<1x1xi32>
1854//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[4, 5\]\]}}> : vector<1x2xi32>
1855//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[1, 2\], \[4, 5\]\]}}> : vector<2x2xi32>
1856//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1857func.func @extract_strided_slice_2d_constant() -> (vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>) {
1858  %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1859  %a = vector.extract_strided_slice %cst
1860   {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]} : vector<2x3xi32> to vector<1x1xi32>
1861  %b = vector.extract_strided_slice %cst
1862   {offsets = [1, 1], sizes = [1, 2], strides = [1, 1]} : vector<2x3xi32> to vector<1x2xi32>
1863  %c = vector.extract_strided_slice %cst
1864   {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]} : vector<2x3xi32> to vector<2x2xi32>
1865  return %a, %b, %c : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1866}
1867
1868// -----
1869
1870// CHECK-LABEL: func.func @extract_strided_slice_3d_constant
1871//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[\[8, 9\], \[10, 11\]\]\]}}> : vector<1x2x2xi32>
1872//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[\[2, 3\]\]\]}}> : vector<1x1x2xi32>
1873//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[\[6, 7\]\], \[\[10, 11\]\]\]}}> : vector<2x1x2xi32>
1874//   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<11> : vector<1x1x1xi32>
1875//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
1876func.func @extract_strided_slice_3d_constant() -> (vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>) {
1877  %cst = arith.constant dense<[[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]]> : vector<3x2x2xi32>
1878  %a = vector.extract_strided_slice %cst
1879   {offsets = [2], sizes = [1], strides = [1]} : vector<3x2x2xi32> to vector<1x2x2xi32>
1880  %b = vector.extract_strided_slice %cst
1881   {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]} : vector<3x2x2xi32> to vector<1x1x2xi32>
1882  %c = vector.extract_strided_slice %cst
1883   {offsets = [1, 1, 0], sizes = [2, 1, 2], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<2x1x2xi32>
1884  %d = vector.extract_strided_slice %cst
1885   {offsets = [2, 1, 1], sizes = [1, 1, 1], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<1x1x1xi32>
1886  return %a, %b, %c, %d : vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>
1887}
1888
1889// -----
1890
1891// CHECK-LABEL: extract_extract_strided
1892//  CHECK-SAME: %[[A:.*]]: vector<32x16x4xf16>
1893//       CHECK: %[[V:.*]] = vector.extract %[[A]][9, 7] : vector<4xf16> from vector<32x16x4xf16>
1894//       CHECK: return %[[V]] : vector<4xf16>
1895func.func @extract_extract_strided(%arg0: vector<32x16x4xf16>) -> vector<4xf16> {
1896 %1 = vector.extract_strided_slice %arg0
1897  {offsets = [7, 3], sizes = [10, 8], strides = [1, 1]} :
1898  vector<32x16x4xf16> to vector<10x8x4xf16>
1899  %2 = vector.extract %1[2, 4] : vector<4xf16> from vector<10x8x4xf16>
1900  return %2 : vector<4xf16>
1901}
1902
1903// -----
1904
1905// CHECK-LABEL: extract_insert_strided
1906//  CHECK-SAME: %[[A:.*]]: vector<6x4xf32>
1907//       CHECK: %[[V:.*]] = vector.extract %[[A]][0, 2] : f32 from vector<6x4xf32>
1908//       CHECK: return %[[V]] : f32
1909func.func @extract_insert_strided(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
1910  -> f32 {
1911  %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
1912    : vector<6x4xf32> into vector<8x16xf32>
1913  %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1914  return %2 : f32
1915}
1916
1917// -----
1918
1919// CHECK-LABEL: extract_insert_rank_reduce
1920//  CHECK-SAME: %[[A:.*]]: vector<4xf32>
1921//       CHECK: %[[V:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
1922//       CHECK: return %[[V]] : f32
1923func.func @extract_insert_rank_reduce(%a: vector<4xf32>, %b: vector<8x16xf32>)
1924  -> f32 {
1925  %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1]}
1926    : vector<4xf32> into vector<8x16xf32>
1927  %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1928  return %2 : f32
1929}
1930
1931// -----
1932
1933// CHECK-LABEL: extract_insert_negative
1934//       CHECK: vector.insert_strided_slice
1935//       CHECK: vector.extract
1936func.func @extract_insert_negative(%a: vector<2x15xf32>, %b: vector<12x8x16xf32>)
1937  -> vector<16xf32> {
1938  %0 = vector.insert_strided_slice %a, %b {offsets = [4, 2, 0], strides = [1, 1]}
1939    : vector<2x15xf32> into vector<12x8x16xf32>
1940  %2 = vector.extract %0[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1941  return %2 : vector<16xf32>
1942}
1943
1944// -----
1945
1946// CHECK-LABEL: extract_insert_chain
1947//  CHECK-SAME: (%[[A:.*]]: vector<2x16xf32>, %[[B:.*]]: vector<12x8x16xf32>, %[[C:.*]]: vector<2x16xf32>)
1948//       CHECK: %[[V:.*]] = vector.extract %[[C]][0] : vector<16xf32> from vector<2x16xf32>
1949//       CHECK: return %[[V]] : vector<16xf32>
1950func.func @extract_insert_chain(%a: vector<2x16xf32>, %b: vector<12x8x16xf32>, %c: vector<2x16xf32>)
1951  -> vector<16xf32> {
1952  %0 = vector.insert_strided_slice %c, %b {offsets = [4, 2, 0], strides = [1, 1]}
1953    : vector<2x16xf32> into vector<12x8x16xf32>
1954  %1 = vector.insert_strided_slice %a, %0 {offsets = [0, 2, 0], strides = [1, 1]}
1955    : vector<2x16xf32> into vector<12x8x16xf32>
1956  %2 = vector.extract %1[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1957  return %2 : vector<16xf32>
1958}
1959
1960// -----
1961
1962// CHECK-LABEL: extract_from_extract_chain_should_not_fold_dynamic_extracts
1963//  CHECK-SAME: (%[[VEC:.*]]: vector<2x4xf32>, %[[IDX:.*]]: index)
1964//       CHECK: %[[A:.*]] = vector.extract %[[VEC]][%[[IDX]]] : vector<4xf32> from vector<2x4xf32>
1965//       CHECK: %[[B:.*]] = vector.extract %[[A]][1] : f32 from vector<4xf32>
1966func.func @extract_from_extract_chain_should_not_fold_dynamic_extracts(%v: vector<2x4xf32>, %index: index) -> f32 {
1967  %0 = vector.extract %v[%index] : vector<4xf32> from vector<2x4xf32>
1968  %1 = vector.extract %0[1] : f32 from vector<4xf32>
1969  return %1 : f32
1970}
1971
1972// -----
1973
1974// CHECK-LABEL: extract_extract_strided2
1975//  CHECK-SAME: %[[A:.*]]: vector<2x4xf32>
1976//       CHECK: %[[V:.*]] = vector.extract %[[A]][1] : vector<4xf32> from vector<2x4xf32>
1977//       CHECK: return %[[V]] : vector<4xf32>
1978func.func @extract_extract_strided2(%A: vector<2x4xf32>)
1979  -> (vector<4xf32>) {
1980 %0 = vector.extract_strided_slice %A {offsets = [1, 0], sizes = [1, 4], strides = [1, 1]} : vector<2x4xf32> to vector<1x4xf32>
1981 %1 = vector.extract %0[0] : vector<4xf32> from vector<1x4xf32>
1982 return %1 : vector<4xf32>
1983}
1984
1985// -----
1986
1987// CHECK-LABEL: func @splat_fold
1988func.func @splat_fold() -> vector<4xf32> {
1989  %c = arith.constant 1.0 : f32
1990  %v = vector.splat %c : vector<4xf32>
1991  return %v : vector<4xf32>
1992
1993  // CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32>
1994  // CHECK-NEXT: return [[V]] : vector<4xf32>
1995}
1996
1997// -----
1998
1999// CHECK-LABEL: func @shuffle_1d
2000//       CHECK:   %[[V:.+]] = arith.constant dense<[3, 2, 5, 1]> : vector<4xi32>
2001//       CHECK:   return %[[V]]
2002func.func @shuffle_1d() -> vector<4xi32> {
2003  %v0 = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2004  %v1 = arith.constant dense<[3, 4, 5]> : vector<3xi32>
2005  %shuffle = vector.shuffle %v0, %v1 [3, 2, 5, 1] : vector<3xi32>, vector<3xi32>
2006  return %shuffle : vector<4xi32>
2007}
2008
2009// CHECK-LABEL: func @shuffle_canonicalize_0d
2010func.func @shuffle_canonicalize_0d(%v0 : vector<i32>, %v1 : vector<i32>) -> vector<1xi32> {
2011  // CHECK: vector.broadcast %{{.*}} : vector<i32> to vector<1xi32>
2012  %shuffle = vector.shuffle %v0, %v1 [0] : vector<i32>, vector<i32>
2013  return %shuffle : vector<1xi32>
2014}
2015
2016// CHECK-LABEL: func @shuffle_fold1
2017//       CHECK:   %arg0 : vector<4xi32>
2018func.func @shuffle_fold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<4xi32> {
2019  %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4xi32>, vector<2xi32>
2020  return %shuffle : vector<4xi32>
2021}
2022
2023// CHECK-LABEL: func @shuffle_fold2
2024//       CHECK:   %arg1 : vector<2xi32>
2025func.func @shuffle_fold2(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<2xi32> {
2026  %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4xi32>, vector<2xi32>
2027  return %shuffle : vector<2xi32>
2028}
2029
2030// CHECK-LABEL: func @shuffle_fold3
2031//       CHECK:   return %arg0 : vector<4x5x6xi32>
2032func.func @shuffle_fold3(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<4x5x6xi32> {
2033  %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4x5x6xi32>, vector<2x5x6xi32>
2034  return %shuffle : vector<4x5x6xi32>
2035}
2036
2037// CHECK-LABEL: func @shuffle_fold4
2038//       CHECK:   return %arg1 : vector<2x5x6xi32>
2039func.func @shuffle_fold4(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<2x5x6xi32> {
2040  %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4x5x6xi32>, vector<2x5x6xi32>
2041  return %shuffle : vector<2x5x6xi32>
2042}
2043
2044// CHECK-LABEL: func @shuffle_nofold1
2045//       CHECK:   %[[V:.+]] = vector.shuffle %arg0, %arg1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2046//       CHECK:   return %[[V]]
2047func.func @shuffle_nofold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<5xi32> {
2048  %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2049  return %shuffle : vector<5xi32>
2050}
2051
2052// -----
2053
2054// CHECK-LABEL: func @transpose_scalar_broadcast1
2055//  CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
2056//       CHECK:   %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x8xf32>
2057//       CHECK:   return %[[V]] : vector<1x8xf32>
2058func.func @transpose_scalar_broadcast1(%value: vector<1xf32>) -> vector<1x8xf32> {
2059  %bcast = vector.broadcast %value : vector<1xf32> to vector<8x1xf32>
2060  %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2061  return %t : vector<1x8xf32>
2062}
2063
2064// -----
2065
2066// CHECK-LABEL: func @transpose_scalar_broadcast2
2067//  CHECK-SAME: (%[[ARG:.+]]: f32)
2068//       CHECK:   %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x8xf32>
2069//       CHECK:   return %[[V]] : vector<1x8xf32>
2070func.func @transpose_scalar_broadcast2(%value: f32) -> vector<1x8xf32> {
2071  %bcast = vector.broadcast %value : f32 to vector<8x1xf32>
2072  %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2073  return %t : vector<1x8xf32>
2074}
2075
2076// -----
2077
2078// CHECK-LABEL: func @transpose_splat_constant
2079//       CHECK:   %[[CST:.+]] = arith.constant dense<5.000000e+00> : vector<8x4xf32>
2080//       CHECK:   return %[[CST]]
2081func.func @transpose_splat_constant() -> vector<8x4xf32> {
2082  %cst = arith.constant dense<5.0> : vector<4x8xf32>
2083  %0 = vector.transpose %cst, [1, 0] : vector<4x8xf32> to vector<8x4xf32>
2084  return %0 : vector<8x4xf32>
2085}
2086
2087// CHECK-LABEL:   func @transpose_splat2(
2088// CHECK-SAME:                           %[[VAL_0:.*]]: f32) -> vector<3x4xf32> {
2089// CHECK:           %[[VAL_1:.*]] = vector.splat %[[VAL_0]] : vector<3x4xf32>
2090// CHECK:           return %[[VAL_1]] : vector<3x4xf32>
2091// CHECK:         }
2092func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> {
2093  %splat = vector.splat %arg : vector<4x3xf32>
2094  %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
2095  return %0 : vector<3x4xf32>
2096}
2097
2098// -----
2099
2100// CHECK-LABEL: func.func @insert_1d_constant
2101//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[9, 1, 2]> : vector<3xi32>
2102//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 9, 2]> : vector<3xi32>
2103//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[0, 1, 9]> : vector<3xi32>
2104//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<3xi32>, vector<3xi32>
2105func.func @insert_1d_constant() -> (vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2106  %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2107  %icst = arith.constant 9 : i32
2108  %a = vector.insert %icst, %vcst[0] : i32 into vector<3xi32>
2109  %b = vector.insert %icst, %vcst[1] : i32 into vector<3xi32>
2110  %c = vector.insert %icst, %vcst[2] : i32 into vector<3xi32>
2111  return %a, %b, %c : vector<3xi32>, vector<3xi32>, vector<3xi32>
2112}
2113
2114// -----
2115
2116// CHECK-LABEL: func.func @insert_2d_constant
2117//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[99, 1, 2\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2118//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[3, 4, 99\]\]}}> : vector<2x3xi32>
2119//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[90, 91, 92\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2120//   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[90, 91, 92\]\]}}> : vector<2x3xi32>
2121//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
2122func.func @insert_2d_constant() -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2123  %vcst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
2124  %cst_scalar = arith.constant 99 : i32
2125  %cst_1d = arith.constant dense<[90, 91, 92]> : vector<3xi32>
2126  %a = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2127  %b = vector.insert %cst_scalar, %vcst[1, 2] : i32 into vector<2x3xi32>
2128  %c = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2129  %d = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2130  return %a, %b, %c, %d : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2131}
2132
2133// -----
2134
2135// CHECK-LABEL: func.func @insert_2d_splat_constant
2136//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2x3xi32>
2137//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[99, 0, 0\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2138//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[0, 99, 0\]\]}}> : vector<2x3xi32>
2139//   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[33, 33, 33\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2140//   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[33, 33, 33\]\]}}> : vector<2x3xi32>
2141//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2142func.func @insert_2d_splat_constant()
2143  -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2144  %vcst = arith.constant dense<0> : vector<2x3xi32>
2145  %cst_zero = arith.constant 0 : i32
2146  %cst_scalar = arith.constant 99 : i32
2147  %cst_1d = arith.constant dense<33> : vector<3xi32>
2148  %a = vector.insert %cst_zero, %vcst[0, 0] : i32 into vector<2x3xi32>
2149  %b = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2150  %c = vector.insert %cst_scalar, %vcst[1, 1] : i32 into vector<2x3xi32>
2151  %d = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2152  %e = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2153  return %a, %b, %c, %d, %e : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2154}
2155
2156// -----
2157
2158// CHECK-LABEL: func @insert_element_fold
2159//       CHECK:   %[[V:.+]] = arith.constant dense<[0, 1, 7, 3]> : vector<4xi32>
2160//       CHECK:   return %[[V]]
2161func.func @insert_element_fold() -> vector<4xi32> {
2162  %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2163  %s = arith.constant 7 : i32
2164  %i = arith.constant 2 : i32
2165  %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2166  return %1 : vector<4xi32>
2167}
2168
2169// -----
2170
2171// CHECK-LABEL: func @insert_element_invalid_fold
2172func.func @insert_element_invalid_fold() -> vector<1xf32> {
2173  // Out-of-bound index here.
2174  %c26 = arith.constant 26 : index
2175  %cst_2 = arith.constant 1.60215309E+9 : f32
2176  %cst_20 = arith.constant dense<1.60215309E+9> : vector<1xf32>
2177// CHECK: vector.insertelement
2178  %46 = vector.insertelement %cst_2, %cst_20[%c26 : index] : vector<1xf32>
2179  return %46 : vector<1xf32>
2180}
2181
2182
2183// -----
2184
2185// Do not crash on poison
2186// CHECK-LABEL: func @insert_poison_fold1
2187//       CHECK:   vector.insertelement
2188func.func @insert_poison_fold1() -> vector<4xi32> {
2189  %v = ub.poison : vector<4xi32>
2190  %s = arith.constant 7 : i32
2191  %i = arith.constant 2 : i32
2192  %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2193  return %1 : vector<4xi32>
2194}
2195
2196// -----
2197
2198// Do not crash on poison
2199// CHECK-LABEL: func @insert_poison_fold2
2200//       CHECK:   vector.insertelement
2201func.func @insert_poison_fold2() -> vector<4xi32> {
2202  %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2203  %s = ub.poison : i32
2204  %i = arith.constant 2 : i32
2205  %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2206  return %1 : vector<4xi32>
2207}
2208
2209// -----
2210
2211// Do not crash on poison
2212// CHECK-LABEL: func @insert_poison_fold3
2213//       CHECK:   vector.insertelement
2214func.func @insert_poison_fold3() -> vector<4xi32> {
2215  %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2216  %s = arith.constant 7 : i32
2217  %i = ub.poison : i32
2218  %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2219  return %1 : vector<4xi32>
2220}
2221
2222// -----
2223
2224// CHECK-LABEL: func @extract_element_fold
2225//       CHECK:   %[[C:.+]] = arith.constant 5 : i32
2226//       CHECK:   return %[[C]]
2227func.func @extract_element_fold() -> i32 {
2228  %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2229  %i = arith.constant 2 : i32
2230  %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2231  return %1 : i32
2232}
2233
2234// CHECK-LABEL: func @extract_element_splat_fold
2235//  CHECK-SAME: (%[[ARG:.+]]: i32)
2236//       CHECK:   return %[[ARG]]
2237func.func @extract_element_splat_fold(%a : i32) -> i32 {
2238  %v = vector.splat %a : vector<4xi32>
2239  %i = arith.constant 2 : i32
2240  %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2241  return %1 : i32
2242}
2243
2244// -----
2245
2246// Do not crash on poison
2247// CHECK-LABEL: func @extract_element_poison_fold1
2248//       CHECK:   vector.extractelement
2249func.func @extract_element_poison_fold1() -> i32 {
2250  %v = ub.poison : vector<4xi32>
2251  %i = arith.constant 2 : i32
2252  %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2253  return %1 : i32
2254}
2255
2256// -----
2257
2258// Do not crash on poison
2259// CHECK-LABEL: func @extract_element_poison_fold2
2260//       CHECK:   vector.extractelement
2261func.func @extract_element_poison_fold2() -> i32 {
2262  %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2263  %i = ub.poison : i32
2264  %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2265  return %1 : i32
2266}
2267
2268// -----
2269
2270// CHECK-LABEL: func @reduce_one_element_vector_extract
2271//  CHECK-SAME: (%[[V:.+]]: vector<1xf32>)
2272//       CHECK:   %[[S:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2273//       CHECK:   return %[[S]] : f32
2274func.func @reduce_one_element_vector_extract(%a : vector<1xf32>) -> f32 {
2275  %s = vector.reduction <add>, %a : vector<1xf32> into f32
2276  return %s : f32
2277}
2278
2279// -----
2280
2281// CHECK-LABEL: func @masked_reduce_one_element_vector_extract
2282//  CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: vector<1xi1>)
2283func.func @masked_reduce_one_element_vector_extract(%a : vector<1xf32>, %mask : vector<1xi1>) -> f32 {
2284//       CHECK:   %[[VAL_2:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2285  %s = vector.mask %mask { vector.reduction <add>, %a : vector<1xf32> into f32 }
2286         : vector<1xi1> -> f32
2287  return %s : f32
2288}
2289
2290// -----
2291
2292// CHECK-LABEL: func @reduce_one_element_vector_addf
2293//  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2294//       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2295//       CHECK:   %[[S:.+]] = arith.addf %[[A]], %arg1 : f32
2296//       CHECK:   return %[[S]]
2297func.func @reduce_one_element_vector_addf(%a : vector<1xf32>, %b: f32) -> f32 {
2298  %s = vector.reduction <add>, %a, %b : vector<1xf32> into f32
2299  return %s : f32
2300}
2301
2302// -----
2303
2304// CHECK-LABEL: func @reduce_one_element_vector_addf_fastmath
2305//  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2306//       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2307//       CHECK:   %[[S:.+]] = arith.addf %[[A]], %arg1 fastmath<nnan,ninf> : f32
2308//       CHECK:   return %[[S]]
2309func.func @reduce_one_element_vector_addf_fastmath(%a : vector<1xf32>, %b: f32) -> f32 {
2310  %s = vector.reduction <add>, %a, %b fastmath<nnan,ninf> : vector<1xf32> into f32
2311  return %s : f32
2312}
2313
2314// -----
2315
2316// CHECK-LABEL: func @masked_reduce_one_element_vector_addf
2317//  CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: f32,
2318//  CHECK-SAME: %[[VAL_2:.*]]: vector<1xi1>)
2319func.func @masked_reduce_one_element_vector_addf(%a: vector<1xf32>,
2320                                                 %b: f32,
2321                                                 %mask: vector<1xi1>) -> f32 {
2322//       CHECK:   %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0] : i1 from vector<1xi1>
2323//       CHECK:   %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2324//       CHECK:   %[[VAL_5:.*]] = arith.addf %[[VAL_4]], %[[VAL_1]] : f32
2325//       CHECK:   %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_1]] : f32
2326  %s = vector.mask %mask { vector.reduction <add>, %a, %b : vector<1xf32> into f32 }
2327         : vector<1xi1> -> f32
2328  return %s : f32
2329}
2330
2331// -----
2332
2333// CHECK-LABEL: func @reduce_one_element_vector_mulf
2334//  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2335//       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2336//       CHECK:   %[[S:.+]] = arith.mulf %[[A]], %arg1 : f32
2337//       CHECK:   return %[[S]]
2338func.func @reduce_one_element_vector_mulf(%a : vector<1xf32>, %b: f32) -> f32 {
2339  %s = vector.reduction <mul>, %a, %b : vector<1xf32> into f32
2340  return %s : f32
2341}
2342
2343// -----
2344
2345// CHECK-LABEL: func @dont_reduce_one_element_vector
2346//       CHECK: vector.reduction
2347func.func @dont_reduce_one_element_vector(%a : vector<4xf32>) -> f32 {
2348  %s = vector.reduction <add>, %a : vector<4xf32> into f32
2349  return %s : f32
2350}
2351
2352// -----
2353
2354// CHECK-LABEL: func @reduce_one_element_vector_maximumf
2355//  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2356//       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2357//       CHECK:   %[[S:.+]] = arith.maximumf %[[A]], %[[B]] : f32
2358//       CHECK:   return %[[S]]
2359func.func @reduce_one_element_vector_maximumf(%a : vector<1xf32>, %b: f32) -> f32 {
2360  %s = vector.reduction <maximumf>, %a, %b : vector<1xf32> into f32
2361  return %s : f32
2362}
2363
2364// -----
2365
2366// CHECK-LABEL: func @bitcast(
2367//  CHECK-SAME:               %[[ARG:.*]]: vector<4x8xf32>) -> vector<4x16xi16> {
2368//       CHECK: vector.bitcast %[[ARG:.*]] : vector<4x8xf32> to vector<4x16xi16>
2369func.func @bitcast(%a: vector<4x8xf32>) -> vector<4x16xi16> {
2370  %0 = vector.bitcast %a : vector<4x8xf32> to vector<4x8xi32>
2371  %1 = vector.bitcast %0 : vector<4x8xi32> to vector<4x16xi16>
2372  return %1 : vector<4x16xi16>
2373}
2374
2375// -----
2376
2377// CHECK-LABEL: @insert_strided_slice_splat
2378//  CHECK-SAME: (%[[ARG:.*]]: f32)
2379//  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<8x16xf32>
2380//  CHECK-NEXT:   return %[[SPLAT]] : vector<8x16xf32>
2381func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) {
2382  %splat0 = vector.splat %x : vector<4x4xf32>
2383  %splat1 = vector.splat %x : vector<8x16xf32>
2384  %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]}
2385    : vector<4x4xf32> into vector<8x16xf32>
2386  return %0 : vector<8x16xf32>
2387}
2388
2389
2390// -----
2391
2392// CHECK-LABEL: @insert_extract_strided_slice
2393//  CHECK-SAME: (%[[ARG:.*]]: vector<8x16xf32>)
2394//  CHECK-NEXT:   return %[[ARG]] : vector<8x16xf32>
2395func.func @insert_extract_strided_slice(%x: vector<8x16xf32>) -> (vector<8x16xf32>) {
2396  %0 = vector.extract_strided_slice %x {offsets = [0, 8], sizes = [2, 4], strides = [1, 1]}
2397        : vector<8x16xf32> to vector<2x4xf32>
2398  %1 = vector.insert_strided_slice %0, %x {offsets = [0, 8], strides = [1, 1]}
2399        : vector<2x4xf32> into vector<8x16xf32>
2400  return %1 : vector<8x16xf32>
2401}
2402
2403// -----
2404
2405// CHECK-LABEL: func.func @insert_strided_1d_constant
2406//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[4, 1, 2]> : vector<3xi32>
2407//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 1, 4]> : vector<3xi32>
2408//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[5, 6, 2]> : vector<3xi32>
2409//   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[0, 5, 6]> : vector<3xi32>
2410//   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2411//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2412func.func @insert_strided_1d_constant() ->
2413  (vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2414  %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2415  %cst_1 = arith.constant dense<4> : vector<1xi32>
2416  %cst_2 = arith.constant dense<[5, 6]> : vector<2xi32>
2417  %cst_3 = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2418  %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [0], strides = [1]} : vector<1xi32> into vector<3xi32>
2419  %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2], strides = [1]} : vector<1xi32> into vector<3xi32>
2420  %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0], strides = [1]} : vector<2xi32> into vector<3xi32>
2421  %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1], strides = [1]} : vector<2xi32> into vector<3xi32>
2422  %e = vector.insert_strided_slice %cst_3, %vcst {offsets = [0], strides = [1]} : vector<3xi32> into vector<3xi32>
2423  return %a, %b, %c, %d, %e : vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>
2424}
2425
2426// -----
2427
2428// CHECK-LABEL: func.func @insert_strided_2d_constant
2429//   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[9, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2430//   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 9\]\]}}> : vector<3x2xi32>
2431//   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[18, 19\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2432//   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[18, 19\], \[4, 5\]\]}}> : vector<3x2xi32>
2433//   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[18, 19\]\]}}> : vector<3x2xi32>
2434//   CHECK-DAG: %[[FCST:.*]] = arith.constant dense<{{\[\[28, 29\], \[38, 39\], \[4, 5\]\]}}> : vector<3x2xi32>
2435//   CHECK-DAG: %[[GCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[28, 29\], \[38, 39\]\]}}> : vector<3x2xi32>
2436//  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]], %[[FCST]], %[[GCST]]
2437func.func @insert_strided_2d_constant() ->
2438  (vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>) {
2439  %vcst = arith.constant dense<[[0, 1], [2, 3], [4, 5]]> : vector<3x2xi32>
2440  %cst_1 = arith.constant dense<9> : vector<1xi32>
2441  %cst_2 = arith.constant dense<[18, 19]> : vector<2xi32>
2442  %cst_3 = arith.constant dense<[[28, 29], [38, 39]]> : vector<2x2xi32>
2443  %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [1, 0], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2444  %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2, 1], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2445  %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2446  %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2447  %e = vector.insert_strided_slice %cst_2, %vcst {offsets = [2, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2448  %f = vector.insert_strided_slice %cst_3, %vcst {offsets = [0, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2449  %g = vector.insert_strided_slice %cst_3, %vcst {offsets = [1, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2450  return %a, %b, %c, %d, %e, %f, %g :
2451    vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>
2452}
2453
2454// -----
2455
2456// CHECK-LABEL: func @shuffle_splat
2457//  CHECK-SAME:   (%[[ARG:.*]]: i32)
2458//  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<4xi32>
2459//  CHECK-NEXT:   return %[[SPLAT]] : vector<4xi32>
2460func.func @shuffle_splat(%x : i32) -> vector<4xi32> {
2461  %v0 = vector.splat %x : vector<4xi32>
2462  %v1 = vector.splat %x : vector<2xi32>
2463  %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32>
2464  return %shuffle : vector<4xi32>
2465}
2466
2467
2468// -----
2469
2470// CHECK-LABEL: func @insert_splat
2471//  CHECK-SAME:   (%[[ARG:.*]]: i32)
2472//  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<2x4x3xi32>
2473//  CHECK-NEXT:   return %[[SPLAT]] : vector<2x4x3xi32>
2474func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> {
2475  %v0 = vector.splat %x : vector<4x3xi32>
2476  %v1 = vector.splat %x : vector<2x4x3xi32>
2477  %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32>
2478  return %insert : vector<2x4x3xi32>
2479}
2480
2481// -----
2482
2483// CHECK-LABEL: func.func @transfer_read_from_rank_reducing_extract_slice
2484//       CHECK:   tensor.extract_slice
2485//       CHECK:   vector.transfer_read
2486func.func @transfer_read_from_rank_reducing_extract_slice(%src: tensor<1x8x8x8xf32>, %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> {
2487  %c0 = arith.constant 0 : index
2488  %f0 = arith.constant 0.000000e+00 : f32
2489  %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [1, 1, 1, 1] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32>
2490  %1 = vector.transfer_read %0[%c0, %i4, %c0], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32>
2491  return %1 : vector<4xf32>
2492}
2493
2494// -----
2495
2496// CHECK-LABEL: func.func @extract_from_broadcast
2497func.func @extract_from_broadcast(%src: vector<1x1x1xf32>) -> vector<1xf32> {
2498  %0 = vector.broadcast %src : vector<1x1x1xf32> to vector<1x1x32x1xf32>
2499
2500  //  CHECK-NEXT:   %0 = vector.extract {{.*}}[0, 0] : vector<1xf32> from vector<1x1x1xf32>
2501  //  CHECK-NEXT:   return %0 : vector<1xf32>
2502  %1 = vector.extract %0[0, 0, 31] : vector<1xf32> from vector<1x1x32x1xf32>
2503  return %1: vector<1xf32>
2504}
2505
2506// CHECK-LABEL: func.func @extract_from_stretch_broadcast
2507func.func @extract_from_stretch_broadcast(%src: vector<3x1x2xf32>) -> f32 {
2508  //  CHECK-NEXT:  %0 = vector.extract {{.*}}[0, 0, 0] : f32 from vector<3x1x2xf32>
2509  //  CHECK-NEXT:  return %0 : f32
2510  %0 = vector.broadcast %src : vector<3x1x2xf32> to vector<3x4x2xf32>
2511  %1 = vector.extract %0[0, 2, 0] : f32 from vector<3x4x2xf32>
2512  return %1: f32
2513}
2514
2515// -----
2516// CHECK-LABEL: func.func @extract_strided_slice_of_constant_mask
2517func.func @extract_strided_slice_of_constant_mask() -> vector<5x7xi1>{
2518  //  CHECK-NEXT:   %[[RES:.*]] = vector.constant_mask [5, 4] : vector<5x7xi1>
2519  //  CHECK-NEXT:   return %[[RES]] : vector<5x7xi1>
2520  %c4 = arith.constant 4 : index
2521  %c10 = arith.constant 10 : index
2522  %mask = vector.create_mask %c10, %c4 : vector<12x7xi1>
2523  %res = vector.extract_strided_slice %mask {offsets = [3], sizes = [5], strides = [1]} : vector<12x7xi1> to vector<5x7xi1>
2524  return %res : vector<5x7xi1>
2525}
2526
2527// -----
2528
2529// CHECK-LABEL: func.func @fold_extractelement_of_broadcast(
2530//  CHECK-SAME:     %[[f:.*]]: f32
2531//       CHECK:   return %[[f]]
2532func.func @fold_extractelement_of_broadcast(%f: f32) -> f32 {
2533  %0 = vector.broadcast %f : f32 to vector<15xf32>
2534  %c5 = arith.constant 5 : index
2535  %1 = vector.extractelement %0 [%c5 : index] : vector<15xf32>
2536  return %1 : f32
2537}
2538
2539// -----
2540
2541// CHECK-LABEL: func.func @fold_0d_vector_reduction
2542func.func @fold_0d_vector_reduction(%arg0: vector<f32>) -> f32 {
2543  // CHECK-NEXT: %[[RES:.*]] = vector.extractelement %arg{{.*}}[] : vector<f32>
2544  // CHECK-NEXT: return %[[RES]] : f32
2545  %0 = vector.reduction <add>, %arg0 : vector<f32> into f32
2546  return %0 : f32
2547}
2548
2549// -----
2550
2551// CHECK-LABEL: func @empty_vector_mask
2552func.func @empty_vector_mask(%mask : vector<8xi1>) {
2553//   CHECK-NOT:   vector.mask
2554  vector.mask %mask { } : vector<8xi1>
2555  return
2556}
2557
2558// -----
2559
2560// CHECK-LABEL: func @empty_vector_mask_with_return
2561//  CHECK-SAME:     %[[IN:.*]]: vector<8xf32>
2562func.func @empty_vector_mask_with_return(%a : vector<8xf32>, %mask : vector<8xi1>) -> vector<8xf32> {
2563//   CHECK-NOT:   vector.mask
2564//       CHECK:   return %[[IN]] : vector<8xf32>
2565  %0 = vector.mask %mask { vector.yield %a : vector<8xf32> } : vector<8xi1> -> vector<8xf32>
2566  return %0 : vector<8xf32>
2567}
2568
2569// -----
2570
2571// CHECK-LABEL: func @all_true_vector_mask
2572//  CHECK-SAME:     %[[IN:.*]]: tensor<3x4xf32>
2573func.func @all_true_vector_mask(%ta : tensor<3x4xf32>) -> vector<3x4xf32> {
2574//   CHECK-NOT:   vector.mask
2575//       CHECK:   %[[LD:.*]] = vector.transfer_read %[[IN]]
2576//       CHECK:   return %[[LD]] : vector<3x4xf32>
2577  %c0 = arith.constant 0 : index
2578  %cf0 = arith.constant 0.0 : f32
2579  %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2580  %0 = vector.mask %all_true { vector.transfer_read %ta[%c0, %c0], %cf0 : tensor<3x4xf32>, vector<3x4xf32> } : vector<3x4xi1> -> vector<3x4xf32>
2581  return %0 : vector<3x4xf32>
2582}
2583
2584// -----
2585
2586// CHECK-LABEL: func @all_true_vector_mask_no_result(
2587func.func @all_true_vector_mask_no_result(%a : vector<3x4xf32>, %m : memref<3x4xf32>) {
2588//   CHECK-NOT:   vector.mask
2589//       CHECK:   vector.transfer_write
2590  %c0 = arith.constant 0 : index
2591  %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2592  vector.mask %all_true { vector.transfer_write %a, %m[%c0, %c0] : vector<3x4xf32>, memref<3x4xf32> } : vector<3x4xi1>
2593  return
2594}
2595
2596// -----
2597
2598// CHECK-LABEL:   func.func @fold_shape_cast_with_mask(
2599// CHECK-SAME:     %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x4xi1> {
2600func.func @fold_shape_cast_with_mask(%arg0: tensor<1x?xf32>) -> vector<1x4xi1> {
2601// CHECK-NOT: vector.shape_cast
2602// CHECK:     %[[VAL_1:.*]] = arith.constant 1 : index
2603// CHECK:     %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2604// CHECK:     %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x4xi1>
2605// CHECK:     return %[[VAL_3]] : vector<1x4xi1>
2606  %c1 = arith.constant 1 : index
2607  %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2608  %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x4x1x1xi1>
2609  %2 = vector.shape_cast %1 : vector<1x4x1x1xi1> to vector<1x4xi1>
2610  return %2 : vector<1x4xi1>
2611}
2612
2613// -----
2614
2615// CHECK-LABEL:   func.func @fold_shape_cast_with_mask_scalable(
2616// CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2617func.func @fold_shape_cast_with_mask_scalable(%arg0: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2618// CHECK-NOT: vector.shape_cast
2619// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
2620// CHECK:           %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2621// CHECK:           %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[4]xi1>
2622// CHECK:           return %[[VAL_3]] : vector<1x[4]xi1>
2623  %c1 = arith.constant 1 : index
2624  %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2625  %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x[4]x1x1xi1>
2626  %2 = vector.shape_cast %1 : vector<1x[4]x1x1xi1> to vector<1x[4]xi1>
2627  return %2 : vector<1x[4]xi1>
2628}
2629
2630// -----
2631
2632// Check that scalable "1" (i.e. [1]) is not folded
2633// CHECK-LABEL:   func.func @fold_shape_cast_with_mask_scalable_one(
2634// CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[1]xi1> {
2635func.func @fold_shape_cast_with_mask_scalable_one(%arg0: tensor<1x?xf32>) -> vector<1x[1]xi1>{
2636// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
2637// CHECK:           %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2638// CHECK:           %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[1]xi1>
2639// CHECK:           return %[[VAL_3]] : vector<1x[1]xi1>
2640  %c1 = arith.constant 1 : index
2641  %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2642  %1 = vector.create_mask %c1, %dim, %c1 : vector<1x[1]x1xi1>
2643  %2 = vector.shape_cast %1 : vector<1x[1]x1xi1> to vector<1x[1]xi1>
2644  return %2 : vector<1x[1]xi1>
2645}
2646
2647// -----
2648
2649// CHECK-LABEL:   func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1> {
2650func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1>{
2651// CHECK-NOT: vector.shape_cast
2652// CHECK:           %[[VAL_0:.*]] = vector.constant_mask [1] : vector<4xi1>
2653// CHECK:           return %[[VAL_0]] : vector<4xi1>
2654  %1 = vector.constant_mask [1, 1, 1] : vector<4x1x1xi1>
2655  %2 = vector.shape_cast %1 : vector<4x1x1xi1> to vector<4xi1>
2656  return %2 : vector<4xi1>
2657}
2658
2659// -----
2660
2661// TODO: This IR could be canonicalized but the canonicalization pattern is not
2662// smart enough. For now, just make sure that we do not crash.
2663
2664// CHECK-LABEL: func.func @load_store_forwarding_rank_mismatch(
2665//       CHECK:   vector.transfer_write
2666//       CHECK:   vector.transfer_read
2667func.func @load_store_forwarding_rank_mismatch(%v0: vector<4x1x1xf32>, %arg0: tensor<4x4x4xf32>) -> (vector<1x100x4x5xf32>) {
2668  %c0 = arith.constant 0 : index
2669  %cf0 = arith.constant 0.0 : f32
2670  // d0 is explicitly written.
2671  %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0]
2672      {in_bounds = [true, true, true],
2673      permutation_map = affine_map<(d0, d1, d2) -> (d2, d1, d0)>} :
2674      vector<4x1x1xf32>, tensor<4x4x4xf32>
2675  // d0 is implicitly read (rank-reduction of unit dim).
2676  %r = vector.transfer_read %w0[%c0, %c0, %c0], %cf0
2677      {in_bounds = [true, true, true, true],
2678      permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
2679      tensor<4x4x4xf32>, vector<1x100x4x5xf32>
2680  return %r : vector<1x100x4x5xf32>
2681}
2682
2683// -----
2684
2685// CHECK-LABEL: func.func @rank_0_shuffle_to_interleave(
2686//  CHECK-SAME:     %[[LHS:.*]]: vector<f64>, %[[RHS:.*]]: vector<f64>)
2687func.func @rank_0_shuffle_to_interleave(%arg0: vector<f64>, %arg1: vector<f64>) -> vector<2xf64> {
2688  // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<f64> -> vector<2xf64>
2689  // CHECK: return %[[ZIP]]
2690  %0 = vector.shuffle %arg0, %arg1 [0, 1] : vector<f64>, vector<f64>
2691  return %0 : vector<2xf64>
2692}
2693
2694// -----
2695
2696// CHECK-LABEL: func.func @rank_1_shuffle_to_interleave(
2697//  CHECK-SAME:     %[[LHS:.*]]: vector<6xi32>, %[[RHS:.*]]: vector<6xi32>)
2698func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi32>) -> vector<12xi32> {
2699  // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<6xi32> -> vector<12xi32>
2700  // CHECK: return %[[ZIP]]
2701  %0 = vector.shuffle %arg0, %arg1 [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11] : vector<6xi32>, vector<6xi32>
2702  return %0 : vector<12xi32>
2703}
2704
2705// -----
2706
2707// CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression(
2708//  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: vector<f32>, %[[c:.*]]: vector<2xf32>)
2709func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) {
2710  // Splat scalar to 0D and extract scalar.
2711  %0 = vector.splat %a : vector<f32>
2712  %1 = vector.extract %0[] : f32 from vector<f32>
2713
2714  // Broadcast scalar to 0D and extract scalar.
2715  %2 = vector.broadcast %a : f32 to vector<f32>
2716  %3 = vector.extract %2[] : f32 from vector<f32>
2717
2718  // Broadcast 0D to 3D and extract scalar.
2719  // CHECK: %[[extract1:.*]] = vector.extractelement %[[b]][] : vector<f32>
2720  %4 = vector.broadcast %b : vector<f32> to vector<1x2x4xf32>
2721  %5 = vector.extract %4[0, 0, 1] : f32 from vector<1x2x4xf32>
2722
2723  // Splat scalar to 2D and extract scalar.
2724  %6 = vector.splat %a : vector<2x3xf32>
2725  %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2726
2727  // Broadcast scalar to 3D and extract scalar.
2728  %8 = vector.broadcast %a : f32 to vector<5x6x7xf32>
2729  %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32>
2730
2731  // Extract 2D from 3D that was broadcasted from a scalar.
2732  // CHECK: %[[extract2:.*]] = vector.broadcast %[[a]] : f32 to vector<6x7xf32>
2733  %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32>
2734
2735  // Extract 1D from 2D that was splat'ed from a scalar.
2736  // CHECK: %[[extract3:.*]] = vector.broadcast %[[a]] : f32 to vector<3xf32>
2737  %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32>
2738
2739  // CHECK:   return %[[a]], %[[a]], %[[extract1]], %[[a]], %[[a]], %[[extract2]], %[[extract3]]
2740  return %1, %3, %5, %7, %9, %10, %11 : f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>
2741}
2742
2743// -----
2744
2745// CHECK-LABEL: func @extract_scalar_from_from_elements(
2746//  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2747func.func @extract_scalar_from_from_elements(%a: f32, %b: f32) -> (f32, f32, f32, f32, f32, f32, f32) {
2748  // Extract from 0D.
2749  %0 = vector.from_elements %a : vector<f32>
2750  %1 = vector.extract %0[] : f32 from vector<f32>
2751
2752  // Extract from 1D.
2753  %2 = vector.from_elements %a : vector<1xf32>
2754  %3 = vector.extract %2[0] : f32 from vector<1xf32>
2755  %4 = vector.from_elements %a, %b, %a, %a, %b : vector<5xf32>
2756  %5 = vector.extract %4[4] : f32 from vector<5xf32>
2757
2758  // Extract from 2D.
2759  %6 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2760  %7 = vector.extract %6[0, 0] : f32 from vector<2x3xf32>
2761  %8 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2762  %9 = vector.extract %6[1, 1] : f32 from vector<2x3xf32>
2763  %10 = vector.extract %6[1, 2] : f32 from vector<2x3xf32>
2764
2765  // CHECK: return %[[a]], %[[a]], %[[b]], %[[a]], %[[a]], %[[b]], %[[b]]
2766  return %1, %3, %5, %7, %8, %9, %10 : f32, f32, f32, f32, f32, f32, f32
2767}
2768
2769// -----
2770
2771// CHECK-LABEL: func @extract_1d_from_from_elements(
2772//  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2773func.func @extract_1d_from_from_elements(%a: f32, %b: f32) -> (vector<3xf32>, vector<3xf32>) {
2774  %0 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2775  // CHECK: %[[splat1:.*]] = vector.splat %[[a]] : vector<3xf32>
2776  %1 = vector.extract %0[0] : vector<3xf32> from vector<2x3xf32>
2777  // CHECK: %[[splat2:.*]] = vector.splat %[[b]] : vector<3xf32>
2778  %2 = vector.extract %0[1] : vector<3xf32> from vector<2x3xf32>
2779  // CHECK: return %[[splat1]], %[[splat2]]
2780  return %1, %2 : vector<3xf32>, vector<3xf32>
2781}
2782
2783// -----
2784
2785// CHECK-LABEL: func @extract_2d_from_from_elements(
2786//  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2787func.func @extract_2d_from_from_elements(%a: f32, %b: f32) -> (vector<2x2xf32>, vector<2x2xf32>) {
2788  %0 = vector.from_elements %a, %a, %a, %b, %b, %b, %b, %a, %b, %a, %a, %b : vector<3x2x2xf32>
2789  // CHECK: %[[splat1:.*]] = vector.from_elements %[[a]], %[[a]], %[[a]], %[[b]] : vector<2x2xf32>
2790  %1 = vector.extract %0[0] : vector<2x2xf32> from vector<3x2x2xf32>
2791  // CHECK: %[[splat2:.*]] = vector.from_elements %[[b]], %[[b]], %[[b]], %[[a]] : vector<2x2xf32>
2792  %2 = vector.extract %0[1] : vector<2x2xf32> from vector<3x2x2xf32>
2793  // CHECK: return %[[splat1]], %[[splat2]]
2794  return %1, %2 : vector<2x2xf32>, vector<2x2xf32>
2795}
2796
2797// -----
2798
2799// CHECK-LABEL: func @from_elements_to_splat(
2800//  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2801func.func @from_elements_to_splat(%a: f32, %b: f32) -> (vector<2x3xf32>, vector<2x3xf32>, vector<f32>) {
2802  // CHECK: %[[splat:.*]] = vector.splat %[[a]] : vector<2x3xf32>
2803  %0 = vector.from_elements %a, %a, %a, %a, %a, %a : vector<2x3xf32>
2804  // CHECK: %[[from_el:.*]] = vector.from_elements {{.*}} : vector<2x3xf32>
2805  %1 = vector.from_elements %a, %a, %a, %a, %b, %a : vector<2x3xf32>
2806  // CHECK: %[[splat2:.*]] = vector.splat %[[a]] : vector<f32>
2807  %2 = vector.from_elements %a : vector<f32>
2808  // CHECK: return %[[splat]], %[[from_el]], %[[splat2]]
2809  return %0, %1, %2 : vector<2x3xf32>, vector<2x3xf32>, vector<f32>
2810}
2811
2812// -----
2813
2814// CHECK-LABEL: func @vector_insert_const_regression(
2815//       CHECK:   llvm.mlir.undef
2816//       CHECK:   vector.insert
2817func.func @vector_insert_const_regression(%arg0: i8) -> vector<4xi8> {
2818  %0 = llvm.mlir.undef : vector<4xi8>
2819  %1 = vector.insert %arg0, %0 [0] : i8 into vector<4xi8>
2820  return %1 : vector<4xi8>
2821}
2822
2823// -----
2824
2825// CHECK-LABEL: @insert_scalar_poison_idx
2826func.func @insert_scalar_poison_idx(%a: vector<4x5xf32>, %b: f32)
2827    -> vector<4x5xf32> {
2828  //  CHECK-NOT: vector.insert
2829  // CHECK-NEXT: ub.poison : vector<4x5xf32>
2830  %0 = vector.insert %b, %a[-1, 0] : f32 into vector<4x5xf32>
2831  return %0 : vector<4x5xf32>
2832}
2833
2834// -----
2835
2836// CHECK-LABEL: @insert_vector_poison_idx
2837func.func @insert_vector_poison_idx(%a: vector<4x5xf32>, %b: vector<5xf32>)
2838    -> vector<4x5xf32> {
2839  //  CHECK-NOT: vector.insert
2840  // CHECK-NEXT: ub.poison : vector<4x5xf32>
2841  %0 = vector.insert %b, %a[-1] : vector<5xf32> into vector<4x5xf32>
2842  return %0 : vector<4x5xf32>
2843}
2844
2845// -----
2846
2847// CHECK-LABEL: @insert_multiple_poison_idx
2848func.func @insert_multiple_poison_idx(%a: vector<4x5x8xf32>, %b: vector<8xf32>)
2849    -> vector<4x5x8xf32> {
2850  //  CHECK-NOT: vector.insert
2851  // CHECK-NEXT: ub.poison : vector<4x5x8xf32>
2852  %0 = vector.insert %b, %a[-1, -1] : vector<8xf32> into vector<4x5x8xf32>
2853  return %0 : vector<4x5x8xf32>
2854}
2855
2856// -----
2857
2858// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract
2859// CHECK:        %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0, 0] : vector<4xi32> from vector<8x1x2x1x1x4xi32>
2860// CHECK-NEXT:   return %[[EXTRACT]] :  vector<4xi32>
2861func.func @contiguous_extract_strided_slices_to_extract(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<4xi32> {
2862  %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2863  %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<4xi32>
2864  return %2 : vector<4xi32>
2865}
2866
2867// -----
2868
2869// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_shorter_size_list
2870// CHECK:        %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0] : vector<1x4xi32> from vector<8x1x2x1x1x4xi32>
2871// CHECK-NEXT:   return %[[EXTRACT]] :  vector<1x4xi32>
2872func.func @contiguous_extract_strided_slices_to_extract_shorter_size_list(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x4xi32> {
2873  %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1], strides = [1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2874  %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<1x4xi32>
2875  return %2 : vector<1x4xi32>
2876}
2877
2878// -----
2879
2880// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size
2881// CHECK-NEXT:   vector.extract_strided_slice
2882func.func @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<8x1x1x1x1x4xi32> {
2883  %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<8x1x1x1x1x4xi32>
2884  return %1 : vector<8x1x1x1x1x4xi32>
2885}
2886
2887// -----
2888
2889// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_size
2890// CHECK-NEXT:   vector.extract_strided_slice
2891func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x1x1x1x2xi32> {
2892  %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 2], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x2xi32>
2893  return %1 : vector<1x1x1x1x1x2xi32>
2894}
2895
2896// -----
2897
2898// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size
2899// CHECK-NEXT:    vector.extract_strided_slice
2900func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x2x1x1x1xi32> {
2901  %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 2, 1, 1, 1], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x2x1x1x1xi32>
2902  return %1 : vector<1x1x2x1x1x1xi32>
2903}
2904
2905// -----
2906
2907// CHECK-LABEL: @contiguous_gather
2908//  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[PASSTHRU:.*]]: vector<16xf32>)
2909//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
2910//       CHECK:   %[[R:.*]] = vector.maskedload %[[BASE]][%[[C0]]], %[[MASK]], %[[PASSTHRU]] : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2911//       CHECK:   return %[[R]]
2912func.func @contiguous_gather(%base: memref<?xf32>,
2913                             %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> {
2914  %c0 = arith.constant 0 : index
2915  %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
2916  %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2917    memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2918  return %1 : vector<16xf32>
2919}
2920
2921// -----
2922
2923// CHECK-LABEL: @contiguous_gather_non_zero_start(
2924//  TODO: Non-zero start is not supported yet.
2925//       CHECK:   %[[R:.*]] = vector.gather
2926//       CHECK:   return %[[R]]
2927func.func @contiguous_gather_non_zero_start(%base: memref<?xf32>,
2928                                            %mask: vector<16xi1>,
2929                                            %passthru: vector<16xf32>) -> vector<16xf32> {
2930  %c0 = arith.constant 0 : index
2931  %indices = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : vector<16xi32>
2932  %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2933    memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2934  return %1 : vector<16xf32>
2935}
2936
2937// -----
2938
2939// CHECK-LABEL: @contiguous_gather_2d(
2940// TODO: Only 1D vectors are supported.
2941//       CHECK:   %[[R:.*]] = vector.gather
2942//       CHECK:   return %[[R]]
2943func.func @contiguous_gather_2d(%base: memref<?x?xf32>,
2944                                %mask: vector<4x4xi1>, %passthru: vector<4x4xf32>) -> vector<4x4xf32> {
2945  %c0 = arith.constant 0 : index
2946  %indices = arith.constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : vector<4x4xi32>
2947  %1 = vector.gather %base[%c0, %c0][%indices], %mask, %passthru :
2948    memref<?x?xf32>, vector<4x4xi32>, vector<4x4xi1>, vector<4x4xf32> into vector<4x4xf32>
2949  return %1 : vector<4x4xf32>
2950}
2951
2952// -----
2953
2954// CHECK-LABEL: @contiguous_gather_const_mask
2955//  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[PASSTHRU:.*]]: vector<16xf32>)
2956//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
2957//       CHECK:   %[[R:.*]] = vector.load %[[BASE]][%[[C0]]] : memref<?xf32>, vector<16xf32>
2958//       CHECK:   return %[[R]]
2959func.func @contiguous_gather_const_mask(%base: memref<?xf32>,
2960                                        %passthru: vector<16xf32>) -> vector<16xf32> {
2961  %c0 = arith.constant 0 : index
2962  %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
2963  %mask = arith.constant dense<true> : vector<16xi1>
2964  %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2965    memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2966  return %1 : vector<16xf32>
2967}
2968
2969// -----
2970
2971// CHECK-LABEL: @contiguous_gather_step
2972//  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[PASSTHRU:.*]]: vector<16xf32>)
2973//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
2974//       CHECK:   %[[R:.*]] = vector.maskedload %[[BASE]][%[[C0]]], %[[MASK]], %[[PASSTHRU]] : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2975//       CHECK:   return %[[R]]
2976func.func @contiguous_gather_step(%base: memref<?xf32>,
2977                                  %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> {
2978  %c0 = arith.constant 0 : index
2979  %indices = vector.step : vector<16xindex>
2980  %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2981    memref<?xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2982  return %1 : vector<16xf32>
2983}
2984
2985// -----
2986
2987// CHECK-LABEL: @gather_broadcast(
2988// TODO: Broadcast is not supported yet
2989//       CHECK:   %[[R:.*]] = vector.gather
2990//       CHECK:   return %[[R]]
2991func.func @gather_broadcast(%base: memref<?xf32>,
2992                             %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> {
2993  %c0 = arith.constant 0 : index
2994  %indices = arith.constant dense<0> : vector<16xi32>
2995  %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2996    memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2997  return %1 : vector<16xf32>
2998}
2999
3000// -----
3001
3002// CHECK-LABEL: @contiguous_scatter
3003//  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[VALUE:.*]]: vector<16xf32>)
3004//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
3005//       CHECK:   vector.maskedstore %[[BASE]][%[[C0]]], %[[MASK]], %[[VALUE]] : memref<?xf32>, vector<16xi1>, vector<16xf32>
3006func.func @contiguous_scatter(%base: memref<?xf32>,
3007                              %mask: vector<16xi1>, %value: vector<16xf32>) {
3008  %c0 = arith.constant 0 : index
3009  %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
3010  vector.scatter %base[%c0][%indices], %mask, %value :
3011    memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
3012  return
3013}
3014
3015// -----
3016
3017// CHECK-LABEL: @contiguous_scatter_const_mask
3018//  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[VALUE:.*]]: vector<16xf32>)
3019//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
3020//       CHECK:   vector.store %[[VALUE]], %[[BASE]][%[[C0]]] : memref<?xf32>, vector<16xf32>
3021func.func @contiguous_scatter_const_mask(%base: memref<?xf32>,
3022                                         %value: vector<16xf32>) {
3023  %c0 = arith.constant 0 : index
3024  %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
3025  %mask = vector.constant_mask [16] : vector<16xi1>
3026  vector.scatter %base[%c0][%indices], %mask, %value :
3027    memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
3028  return
3029}
3030
3031// -----
3032
3033// CHECK-LABEL: @contiguous_scatter_step
3034//  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[VALUE:.*]]: vector<16xf32>)
3035//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
3036//       CHECK:   vector.maskedstore %[[BASE]][%[[C0]]], %[[MASK]], %[[VALUE]] : memref<?xf32>, vector<16xi1>, vector<16xf32>
3037func.func @contiguous_scatter_step(%base: memref<?xf32>,
3038                                   %mask: vector<16xi1>, %value: vector<16xf32>) {
3039  %c0 = arith.constant 0 : index
3040  %indices = vector.step : vector<16xindex>
3041  vector.scatter %base[%c0][%indices], %mask, %value :
3042    memref<?xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32>
3043  return
3044}
3045