xref: /llvm-project/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir (revision 35df525fd00c2037ef144189ee818b7d612241ff)
1// RUN: mlir-opt %s -convert-vector-to-llvm -split-input-file | FileCheck %s
2
3//===----------------------------------------------------------------------===//
4// vector.bitcast
5//===----------------------------------------------------------------------===//
6
7func.func @bitcast_f32_to_i32_vector_0d(%arg0: vector<f32>) -> vector<i32> {
8  %0 = vector.bitcast %arg0 : vector<f32> to vector<i32>
9  return %0 : vector<i32>
10}
11
12// CHECK-LABEL: @bitcast_f32_to_i32_vector_0d
13// CHECK-SAME:  %[[ARG_0:.*]]: vector<f32>
14// CHECK:       %[[VEC_F32_1D:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<f32> to vector<1xf32>
15// CHECK:       %[[VEC_I32_1D:.*]] = llvm.bitcast %[[VEC_F32_1D]] : vector<1xf32> to vector<1xi32>
16// CHECK:       %[[VEC_I32_0D:.*]] = builtin.unrealized_conversion_cast %[[VEC_I32_1D]] : vector<1xi32> to vector<i32>
17// CHECK:       return %[[VEC_I32_0D]] : vector<i32>
18
19// -----
20
21func.func @bitcast_f32_to_i32_vector(%arg0: vector<16xf32>) -> vector<16xi32> {
22  %0 = vector.bitcast %arg0 : vector<16xf32> to vector<16xi32>
23  return %0 : vector<16xi32>
24}
25
26
27// CHECK-LABEL: @bitcast_f32_to_i32_vector
28// CHECK-SAME:  %[[ARG_0:.*]]: vector<16xf32>
29// CHECK:       llvm.bitcast %[[ARG_0]] : vector<16xf32> to vector<16xi32>
30
31// -----
32
33func.func @bitcast_f32_to_i32_vector_scalable(%arg0: vector<[16]xf32>) -> vector<[16]xi32> {
34  %0 = vector.bitcast %arg0 : vector<[16]xf32> to vector<[16]xi32>
35  return %0 : vector<[16]xi32>
36}
37
38// CHECK-LABEL: @bitcast_f32_to_i32_vector_scalable
39// CHECK-SAME:  %[[ARG_0:.*]]: vector<[16]xf32>
40// CHECK:       llvm.bitcast %[[ARG_0]] : vector<[16]xf32> to vector<[16]xi32>
41
42// -----
43
44func.func @bitcast_i8_to_f32_vector(%arg0: vector<64xi8>) -> vector<16xf32> {
45  %0 = vector.bitcast %arg0 : vector<64xi8> to vector<16xf32>
46  return %0 : vector<16xf32>
47}
48
49// CHECK-LABEL: @bitcast_i8_to_f32_vector
50// CHECK-SAME:  %[[ARG_0:.*]]: vector<64xi8>
51// CHECK:       llvm.bitcast %[[ARG_0]] : vector<64xi8> to vector<16xf32>
52
53// -----
54
55func.func @bitcast_i8_to_f32_vector_scalable(%arg0: vector<[64]xi8>) -> vector<[16]xf32> {
56  %0 = vector.bitcast %arg0 : vector<[64]xi8> to vector<[16]xf32>
57  return %0 : vector<[16]xf32>
58}
59
60// CHECK-LABEL: @bitcast_i8_to_f32_vector_scalable
61// CHECK-SAME:  %[[ARG_0:.*]]: vector<[64]xi8>
62// CHECK:       llvm.bitcast %[[ARG_0]] : vector<[64]xi8> to vector<[16]xf32>
63
64// -----
65
66func.func @bitcast_index_to_i8_vector(%arg0: vector<16xindex>) -> vector<128xi8> {
67  %0 = vector.bitcast %arg0 : vector<16xindex> to vector<128xi8>
68  return %0 : vector<128xi8>
69}
70
71// CHECK-LABEL: @bitcast_index_to_i8_vector
72// CHECK-SAME:  %[[ARG_0:.*]]: vector<16xindex>
73// CHECK:       %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<16xindex> to vector<16xi64>
74// CHECK:       llvm.bitcast %[[T0]] : vector<16xi64> to vector<128xi8>
75
76// -----
77
78func.func @bitcast_index_to_i8_vector_scalable(%arg0: vector<[16]xindex>) -> vector<[128]xi8> {
79  %0 = vector.bitcast %arg0 : vector<[16]xindex> to vector<[128]xi8>
80  return %0 : vector<[128]xi8>
81}
82
83// CHECK-LABEL: @bitcast_index_to_i8_vector_scalable
84// CHECK-SAME:  %[[ARG_0:.*]]: vector<[16]xindex>
85// CHECK:       %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<[16]xindex> to vector<[16]xi64>
86// CHECK:       llvm.bitcast %[[T0]] : vector<[16]xi64> to vector<[128]xi8>
87
88// -----
89
90// CHECK-LABEL:   func.func @bitcast_2d(
91// CHECK-SAME:      %[[ARG_0:.*]]: vector<2x4xi32>) -> vector<2x2xi64> {
92// CHECK:           %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<2x4xi32> to !llvm.array<2 x vector<4xi32>>
93// CHECK:           %[[VEC_1:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.array<2 x vector<4xi32>>
94// CHECK:           %[[BCAST_1:.*]] = llvm.bitcast %[[VEC_1]] : vector<4xi32> to vector<2xi64>
95// CHECK:           %[[OUT_1:.*]] = llvm.insertvalue %[[BCAST_1]], {{.*}}[0] : !llvm.array<2 x vector<2xi64>>
96// CHECK:           %[[VEC_2:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.array<2 x vector<4xi32>>
97// CHECK:           %[[BCAST_2:.*]] = llvm.bitcast %[[VEC_2]] : vector<4xi32> to vector<2xi64>
98// CHECK:           %[[OUT_2:.*]] = llvm.insertvalue %[[BCAST_2]], %[[OUT_1]][1] : !llvm.array<2 x vector<2xi64>>
99func.func @bitcast_2d(%arg0: vector<2x4xi32>) -> vector<2x2xi64> {
100  %0 = vector.bitcast %arg0 : vector<2x4xi32> to vector<2x2xi64>
101  return %0 : vector<2x2xi64>
102}
103
104// -----
105
106// CHECK-LABEL:   func.func @bitcast_2d_scalable(
107// CHECK-SAME:      %[[ARG_0:.*]]: vector<2x[4]xi32>) -> vector<2x[2]xi64> {
108// CHECK:           %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<2x[4]xi32> to !llvm.array<2 x vector<[4]xi32>>
109// CHECK:           %[[VEC_1:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.array<2 x vector<[4]xi32>>
110// CHECK:           %[[BCAST_1:.*]] = llvm.bitcast %[[VEC_1]] : vector<[4]xi32> to vector<[2]xi64>
111// CHECK:           %[[OUT_1:.*]] = llvm.insertvalue %[[BCAST_1]], {{.*}}[0] : !llvm.array<2 x vector<[2]xi64>>
112// CHECK:           %[[VEC_2:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.array<2 x vector<[4]xi32>>
113// CHECK:           %[[BCAST_2:.*]] = llvm.bitcast %[[VEC_2]] : vector<[4]xi32> to vector<[2]xi64>
114// CHECK:           %[[OUT_2:.*]] = llvm.insertvalue %[[BCAST_2]], %[[OUT_1]][1] : !llvm.array<2 x vector<[2]xi64>>
115func.func @bitcast_2d_scalable(%arg0: vector<2x[4]xi32>) -> vector<2x[2]xi64> {
116  %0 = vector.bitcast %arg0 : vector<2x[4]xi32> to vector<2x[2]xi64>
117  return %0 : vector<2x[2]xi64>
118}
119
120// -----
121
122//===----------------------------------------------------------------------===//
123// vector.broadcast
124//===----------------------------------------------------------------------===//
125
126func.func @broadcast_vec0d_from_f32(%arg0: f32) -> vector<f32> {
127  %0 = vector.broadcast %arg0 : f32 to vector<f32>
128  return %0 : vector<f32>
129}
130// CHECK-LABEL: @broadcast_vec0d_from_f32
131// CHECK-SAME:  %[[A:.*]]: f32)
132// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
133// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<1xf32> to vector<f32>
134// CHECK:       return %[[T1]] : vector<f32>
135
136// -----
137
138func.func @broadcast_vec0d_from_vec0d(%arg0: vector<f32>) -> vector<f32> {
139  %0 = vector.broadcast %arg0 : vector<f32> to vector<f32>
140  return %0 : vector<f32>
141}
142// CHECK-LABEL: @broadcast_vec0d_from_vec0d(
143// CHECK-SAME:  %[[A:.*]]: vector<f32>)
144// CHECK:       return %[[A]] : vector<f32>
145
146// -----
147
148func.func @broadcast_vec1d_from_f32(%arg0: f32) -> vector<2xf32> {
149  %0 = vector.broadcast %arg0 : f32 to vector<2xf32>
150  return %0 : vector<2xf32>
151}
152// CHECK-LABEL: @broadcast_vec1d_from_f32
153// CHECK-SAME:  %[[A:.*]]: f32)
154// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
155// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
156// CHECK:       return %[[T1]] : vector<2xf32>
157
158// -----
159
160func.func @broadcast_vec1d_from_f32_scalable(%arg0: f32) -> vector<[2]xf32> {
161  %0 = vector.broadcast %arg0 : f32 to vector<[2]xf32>
162  return %0 : vector<[2]xf32>
163}
164// CHECK-LABEL: @broadcast_vec1d_from_f32_scalable
165// CHECK-SAME:  %[[A:.*]]: f32)
166// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
167// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
168// CHECK:       return %[[T1]] : vector<[2]xf32>
169
170// -----
171
172func.func @broadcast_vec1d_from_index(%arg0: index) -> vector<2xindex> {
173  %0 = vector.broadcast %arg0 : index to vector<2xindex>
174  return %0 : vector<2xindex>
175}
176// CHECK-LABEL: @broadcast_vec1d_from_index
177// CHECK-SAME:  %[[A:.*]]: index)
178// CHECK:       %[[A1:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64
179// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A1]]
180// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
181// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<2xi64> to vector<2xindex>
182// CHECK:       return %[[T2]] : vector<2xindex>
183
184// -----
185
186func.func @broadcast_vec1d_from_index_scalable(%arg0: index) -> vector<[2]xindex> {
187  %0 = vector.broadcast %arg0 : index to vector<[2]xindex>
188  return %0 : vector<[2]xindex>
189}
190// CHECK-LABEL: @broadcast_vec1d_from_index_scalable
191// CHECK-SAME:  %[[A:.*]]: index)
192// CHECK:       %[[A1:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64
193// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A1]]
194// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
195// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<[2]xi64> to vector<[2]xindex>
196// CHECK:       return %[[T2]] : vector<[2]xindex>
197
198// -----
199
200func.func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> {
201  %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32>
202  return %0 : vector<2x3xf32>
203}
204// CHECK-LABEL: @broadcast_vec2d_from_scalar(
205// CHECK-SAME:  %[[A:.*]]: f32)
206// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
207// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
208// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0] : !llvm.array<2 x vector<3xf32>>
209// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1] : !llvm.array<2 x vector<3xf32>>
210// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32>
211// CHECK:       return %[[T4]] : vector<2x3xf32>
212
213// -----
214
215func.func @broadcast_vec2d_from_scalar_scalable(%arg0: f32) -> vector<2x[3]xf32> {
216  %0 = vector.broadcast %arg0 : f32 to vector<2x[3]xf32>
217  return %0 : vector<2x[3]xf32>
218}
219// CHECK-LABEL: @broadcast_vec2d_from_scalar_scalable(
220// CHECK-SAME:  %[[A:.*]]: f32)
221// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
222// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
223// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0] : !llvm.array<2 x vector<[3]xf32>>
224// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1] : !llvm.array<2 x vector<[3]xf32>>
225// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x vector<[3]xf32>> to vector<2x[3]xf32>
226// CHECK:       return %[[T4]] : vector<2x[3]xf32>
227
228// -----
229
230func.func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> {
231  %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32>
232  return %0 : vector<2x3x4xf32>
233}
234// CHECK-LABEL: @broadcast_vec3d_from_scalar(
235// CHECK-SAME:  %[[A:.*]]: f32)
236// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
237// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
238// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0, 0] : !llvm.array<2 x array<3 x vector<4xf32>>>
239// ...
240// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1, 2] : !llvm.array<2 x array<3 x vector<4xf32>>>
241// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x array<3 x vector<4xf32>>> to vector<2x3x4xf32>
242// CHECK:       return %[[T4]] : vector<2x3x4xf32>
243
244// -----
245
246func.func @broadcast_vec3d_from_scalar_scalable(%arg0: f32) -> vector<2x3x[4]xf32> {
247  %0 = vector.broadcast %arg0 : f32 to vector<2x3x[4]xf32>
248  return %0 : vector<2x3x[4]xf32>
249}
250// CHECK-LABEL: @broadcast_vec3d_from_scalar_scalable(
251// CHECK-SAME:  %[[A:.*]]: f32)
252// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
253// CHECK:       %[[T1:.*]] = llvm.shufflevector %[[T0]]
254// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0, 0] : !llvm.array<2 x array<3 x vector<[4]xf32>>>
255// ...
256// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1, 2] : !llvm.array<2 x array<3 x vector<[4]xf32>>>
257// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x array<3 x vector<[4]xf32>>> to vector<2x3x[4]xf32>
258// CHECK:       return %[[T4]] : vector<2x3x[4]xf32>
259
260// -----
261
262func.func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> {
263  %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32>
264  return %0 : vector<2xf32>
265}
266// CHECK-LABEL: @broadcast_vec1d_from_vec1d(
267// CHECK-SAME:  %[[A:.*]]: vector<2xf32>)
268// CHECK:       return %[[A]] : vector<2xf32>
269
270// -----
271
272func.func @broadcast_vec1d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<[2]xf32> {
273  %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<[2]xf32>
274  return %0 : vector<[2]xf32>
275}
276// CHECK-LABEL: @broadcast_vec1d_from_vec1d_scalable(
277// CHECK-SAME:  %[[A:.*]]: vector<[2]xf32>)
278// CHECK:       return %[[A]] : vector<[2]xf32>
279
280// -----
281
282func.func @broadcast_vec2d_from_vec0d(%arg0: vector<f32>) -> vector<3x2xf32> {
283  %0 = vector.broadcast %arg0 : vector<f32> to vector<3x2xf32>
284  return %0 : vector<3x2xf32>
285}
286// CHECK-LABEL: @broadcast_vec2d_from_vec0d(
287// CHECK-SAME:  %[[A:.*]]: vector<f32>)
288//       CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<f32> to vector<1xf32>
289//       CHECK: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
290//       CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
291//       CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : index) : i64
292//       CHECK: %[[T5:.*]] = llvm.extractelement %[[T0]][%[[T4]] : i64] : vector<1xf32>
293//       CHECK: %[[T6Insert:.*]] = llvm.insertelement %[[T5]]
294//       CHECK: %[[T6:.*]] = llvm.shufflevector %[[T6Insert]]
295//       CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T2]][0] : !llvm.array<3 x vector<2xf32>>
296//       CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][1] : !llvm.array<3 x vector<2xf32>>
297//       CHECK: %[[T9:.*]] = llvm.insertvalue %[[T6]], %[[T8]][2] : !llvm.array<3 x vector<2xf32>>
298//       CHECK: %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T9]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32>
299//       CHECK: return %[[T10]] : vector<3x2xf32>
300
301// -----
302
303func.func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> {
304  %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32>
305  return %0 : vector<3x2xf32>
306}
307// CHECK-LABEL: @broadcast_vec2d_from_vec1d(
308// CHECK-SAME:  %[[A:.*]]: vector<2xf32>)
309// CHECK:       %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
310// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
311// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>>
312// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<2xf32>>
313// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<2xf32>>
314// CHECK:       %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32>
315// CHECK:       return %[[T5]] : vector<3x2xf32>
316
317// -----
318
319func.func @broadcast_vec2d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<3x[2]xf32> {
320  %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<3x[2]xf32>
321  return %0 : vector<3x[2]xf32>
322}
323// CHECK-LABEL: @broadcast_vec2d_from_vec1d_scalable(
324// CHECK-SAME:  %[[A:.*]]: vector<[2]xf32>)
325// CHECK:       %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x[2]xf32>
326// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>>
327// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][0] : !llvm.array<3 x vector<[2]xf32>>
328// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<[2]xf32>>
329// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<[2]xf32>>
330// CHECK:       %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<3 x vector<[2]xf32>> to vector<3x[2]xf32>
331// CHECK:       return %[[T5]] : vector<3x[2]xf32>
332
333// -----
334
335func.func @broadcast_vec2d_from_index_vec1d(%arg0: vector<2xindex>) -> vector<3x2xindex> {
336  %0 = vector.broadcast %arg0 : vector<2xindex> to vector<3x2xindex>
337  return %0 : vector<3x2xindex>
338}
339// CHECK-LABEL: @broadcast_vec2d_from_index_vec1d(
340// CHECK-SAME:  %[[A:.*]]: vector<2xindex>)
341// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<2xindex> to vector<2xi64>
342// CHECK:       %[[T0:.*]] = arith.constant dense<0> : vector<3x2xindex>
343// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x2xindex> to !llvm.array<3 x vector<2xi64>>
344// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<3 x vector<2xi64>>
345
346// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %{{.*}} : !llvm.array<3 x vector<2xi64>> to vector<3x2xindex>
347// CHECK:       return %[[T4]] : vector<3x2xindex>
348
349// -----
350
351func.func @broadcast_vec2d_from_index_vec1d_scalable(%arg0: vector<[2]xindex>) -> vector<3x[2]xindex> {
352  %0 = vector.broadcast %arg0 : vector<[2]xindex> to vector<3x[2]xindex>
353  return %0 : vector<3x[2]xindex>
354}
355// CHECK-LABEL: @broadcast_vec2d_from_index_vec1d_scalable(
356// CHECK-SAME:  %[[A:.*]]: vector<[2]xindex>)
357// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<[2]xindex> to vector<[2]xi64>
358// CHECK:       %[[T0:.*]] = arith.constant dense<0> : vector<3x[2]xindex>
359// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x[2]xindex> to !llvm.array<3 x vector<[2]xi64>>
360// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<3 x vector<[2]xi64>>
361
362// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %{{.*}} : !llvm.array<3 x vector<[2]xi64>> to vector<3x[2]xindex>
363// CHECK:       return %[[T4]] : vector<3x[2]xindex>
364
365// -----
366
367func.func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> {
368  %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32>
369  return %0 : vector<4x3x2xf32>
370}
371// CHECK-LABEL: @broadcast_vec3d_from_vec1d(
372// CHECK-SAME:  %[[A:.*]]: vector<2xf32>)
373// CHECK-DAG:   %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
374// CHECK-DAG:   %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
375// CHECK-DAG:   %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32>
376// CHECK-DAG:   %[[T6:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>>
377
378// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][0] : !llvm.array<3 x vector<2xf32>>
379// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][1] : !llvm.array<3 x vector<2xf32>>
380// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[A]], %[[T4]][2] : !llvm.array<3 x vector<2xf32>>
381
382// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T5]], %[[T6]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
383// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T5]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
384// CHECK:       %[[T9:.*]] = llvm.insertvalue %[[T5]], %[[T8]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
385// CHECK:       %[[T10:.*]] = llvm.insertvalue %[[T5]], %[[T9]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
386
387// CHECK:       %[[T11:.*]] = builtin.unrealized_conversion_cast %[[T10]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32>
388// CHECK:       return %[[T11]] : vector<4x3x2xf32>
389
390// -----
391
392func.func @broadcast_vec3d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<4x3x[2]xf32> {
393  %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<4x3x[2]xf32>
394  return %0 : vector<4x3x[2]xf32>
395}
396// CHECK-LABEL: @broadcast_vec3d_from_vec1d_scalable(
397// CHECK-SAME:  %[[A:.*]]: vector<[2]xf32>)
398// CHECK-DAG:   %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x[2]xf32>
399// CHECK-DAG:   %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>>
400// CHECK-DAG:   %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x[2]xf32>
401// CHECK-DAG:   %[[T6:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x[2]xf32> to !llvm.array<4 x array<3 x vector<[2]xf32>>>
402
403// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][0] : !llvm.array<3 x vector<[2]xf32>>
404// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][1] : !llvm.array<3 x vector<[2]xf32>>
405// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[A]], %[[T4]][2] : !llvm.array<3 x vector<[2]xf32>>
406
407// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T5]], %[[T6]][0] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
408// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T5]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
409// CHECK:       %[[T9:.*]] = llvm.insertvalue %[[T5]], %[[T8]][2] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
410// CHECK:       %[[T10:.*]] = llvm.insertvalue %[[T5]], %[[T9]][3] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
411
412// CHECK:       %[[T11:.*]] = builtin.unrealized_conversion_cast %[[T10]] : !llvm.array<4 x array<3 x vector<[2]xf32>>> to vector<4x3x[2]xf32>
413// CHECK:       return %[[T11]] : vector<4x3x[2]xf32>
414
415// -----
416
417func.func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> {
418  %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32>
419  return %0 : vector<4x3x2xf32>
420}
421// CHECK-LABEL: @broadcast_vec3d_from_vec2d(
422// CHECK-SAME:  %[[A:.*]]: vector<3x2xf32>)
423// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
424// CHECK:       %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32>
425// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>>
426// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
427// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T1]], %[[T3]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
428// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T1]], %[[T5]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
429// CHECK:       %[[T9:.*]] = llvm.insertvalue %[[T1]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
430// CHECK:       %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T9]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32>
431// CHECK:       return %[[T10]] : vector<4x3x2xf32>
432
433// -----
434
435func.func @broadcast_vec3d_from_vec2d_scalable(%arg0: vector<3x[2]xf32>) -> vector<4x3x[2]xf32> {
436  %0 = vector.broadcast %arg0 : vector<3x[2]xf32> to vector<4x3x[2]xf32>
437  return %0 : vector<4x3x[2]xf32>
438}
439// CHECK-LABEL: @broadcast_vec3d_from_vec2d_scalable(
440// CHECK-SAME:  %[[A:.*]]: vector<3x[2]xf32>)
441// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>>
442// CHECK:       %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x[2]xf32>
443// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<4x3x[2]xf32> to !llvm.array<4 x array<3 x vector<[2]xf32>>>
444// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
445// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T1]], %[[T3]][1] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
446// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T1]], %[[T5]][2] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
447// CHECK:       %[[T9:.*]] = llvm.insertvalue %[[T1]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
448// CHECK:       %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T9]] : !llvm.array<4 x array<3 x vector<[2]xf32>>> to vector<4x3x[2]xf32>
449// CHECK:       return %[[T10]] : vector<4x3x[2]xf32>
450
451
452// -----
453
454func.func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> {
455  %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32>
456  return %0 : vector<4xf32>
457}
458// CHECK-LABEL: @broadcast_stretch(
459// CHECK-SAME:  %[[A:.*]]: vector<1xf32>)
460// CHECK:       %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64
461// CHECK:       %[[T2:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T1]] : i64] : vector<1xf32>
462// CHECK:       %[[T3:.*]] = llvm.insertelement %[[T2]]
463// CHECK:       %[[T4:.*]] = llvm.shufflevector %[[T3]]
464// CHECK:       return %[[T4]] : vector<4xf32>
465
466// -----
467
468func.func @broadcast_stretch_scalable(%arg0: vector<1xf32>) -> vector<[4]xf32> {
469  %0 = vector.broadcast %arg0 : vector<1xf32> to vector<[4]xf32>
470  return %0 : vector<[4]xf32>
471}
472// CHECK-LABEL: @broadcast_stretch_scalable(
473// CHECK-SAME:  %[[A:.*]]: vector<1xf32>)
474// CHECK:       %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64
475// CHECK:       %[[T2:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T1]] : i64] : vector<1xf32>
476// CHECK:       %[[T3:.*]] = llvm.insertelement %[[T2]]
477// CHECK:       %[[T4:.*]] = llvm.shufflevector %[[T3]]
478// CHECK:       return %[[T4]] : vector<[4]xf32>
479
480// -----
481
482func.func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> {
483  %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32>
484  return %0 : vector<3x4xf32>
485}
486// CHECK-LABEL: @broadcast_stretch_at_start(
487// CHECK-SAME:  %[[A:.*]]: vector<1x4xf32>)
488// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>>
489// CHECK:       %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<3x4xf32>
490// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<3x4xf32> to !llvm.array<3 x vector<4xf32>>
491// CHECK:       %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<1 x vector<4xf32>>
492// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T3]], %[[T4]][0] : !llvm.array<3 x vector<4xf32>>
493// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T3]], %[[T5]][1] : !llvm.array<3 x vector<4xf32>>
494// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T3]], %[[T6]][2] : !llvm.array<3 x vector<4xf32>>
495// CHECK:       %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32>
496// CHECK:       return %[[T8]] : vector<3x4xf32>
497
498// -----
499
500func.func @broadcast_stretch_at_start_scalable(%arg0: vector<1x[4]xf32>) -> vector<3x[4]xf32> {
501  %0 = vector.broadcast %arg0 : vector<1x[4]xf32> to vector<3x[4]xf32>
502  return %0 : vector<3x[4]xf32>
503}
504// CHECK-LABEL: @broadcast_stretch_at_start_scalable(
505// CHECK-SAME:  %[[A:.*]]: vector<1x[4]xf32>)
506// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<1x[4]xf32> to !llvm.array<1 x vector<[4]xf32>>
507// CHECK:       %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<3x[4]xf32>
508// CHECK:       %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<3x[4]xf32> to !llvm.array<3 x vector<[4]xf32>>
509// CHECK:       %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<1 x vector<[4]xf32>>
510// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T3]], %[[T4]][0] : !llvm.array<3 x vector<[4]xf32>>
511// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T3]], %[[T5]][1] : !llvm.array<3 x vector<[4]xf32>>
512// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T3]], %[[T6]][2] : !llvm.array<3 x vector<[4]xf32>>
513// CHECK:       %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<3 x vector<[4]xf32>> to vector<3x[4]xf32>
514// CHECK:       return %[[T8]] : vector<3x[4]xf32>
515
516// -----
517
518func.func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> {
519  %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32>
520  return %0 : vector<4x3xf32>
521}
522// CHECK-LABEL: @broadcast_stretch_at_end(
523// CHECK-SAME:  %[[A:.*]]: vector<4x1xf32>)
524// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>>
525// CHECK:       %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3xf32>
526// CHECK:       %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3xf32> to !llvm.array<4 x vector<3xf32>>
527// CHECK:       %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<4 x vector<1xf32>>
528// CHECK:       %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64
529// CHECK:       %[[T5:.*]] = llvm.extractelement %[[T3]]{{\[}}%[[T4]] : i64] : vector<1xf32>
530// CHECK:       %[[T6Insert:.*]] = llvm.insertelement %[[T5]]
531// CHECK:       %[[T6:.*]] = llvm.shufflevector %[[T6Insert]]
532// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<4 x vector<3xf32>>
533// CHECK:       %[[T10:.*]] = llvm.extractvalue %[[T2]][1] : !llvm.array<4 x vector<1xf32>>
534// CHECK:       %[[T11:.*]] = llvm.mlir.constant(0 : i64) : i64
535// CHECK:       %[[T12:.*]] = llvm.extractelement %[[T10]]{{\[}}%[[T11]] : i64] : vector<1xf32>
536// CHECK:       %[[T13Insert:.*]] = llvm.insertelement %[[T12]]
537// CHECK:       %[[T13:.*]] = llvm.shufflevector %[[T13Insert]]
538// CHECK:       %[[T14:.*]] = llvm.insertvalue %[[T13]], %[[T8]][1] : !llvm.array<4 x vector<3xf32>>
539// CHECK:       %[[T16:.*]] = llvm.extractvalue %[[T2]][2] : !llvm.array<4 x vector<1xf32>>
540// CHECK:       %[[T17:.*]] = llvm.mlir.constant(0 : i64) : i64
541// CHECK:       %[[T18:.*]] = llvm.extractelement %[[T16]]{{\[}}%[[T17]] : i64] : vector<1xf32>
542// CHECK:       %[[T19Insert:.*]] = llvm.insertelement %[[T18]]
543// CHECK:       %[[T19:.*]] = llvm.shufflevector %[[T19Insert]]
544// CHECK:       %[[T20:.*]] = llvm.insertvalue %[[T19]], %[[T14]][2] : !llvm.array<4 x vector<3xf32>>
545// CHECK:       %[[T22:.*]] = llvm.extractvalue %[[T2]][3] : !llvm.array<4 x vector<1xf32>>
546// CHECK:       %[[T23:.*]] = llvm.mlir.constant(0 : i64) : i64
547// CHECK:       %[[T24:.*]] = llvm.extractelement %[[T22]]{{\[}}%[[T23]] : i64] : vector<1xf32>
548// CHECK:       %[[T25Insert:.*]] = llvm.insertelement %[[T24]]
549// CHECK:       %[[T25:.*]] = llvm.shufflevector %[[T25Insert]]
550// CHECK:       %[[T26:.*]] = llvm.insertvalue %[[T25]], %[[T20]][3] : !llvm.array<4 x vector<3xf32>>
551// CHECK:       %[[T27:.*]] = builtin.unrealized_conversion_cast %[[T26]] : !llvm.array<4 x vector<3xf32>> to vector<4x3xf32>
552// CHECK:       return %[[T27]] : vector<4x3xf32>
553
554// TODO: Add support for scalable vectors
555
556func.func @broadcast_stretch_at_end_scalable(%arg0: vector<[4]x1xf32>) -> vector<[4]x3xf32> {
557  %0 = vector.broadcast %arg0 : vector<[4]x1xf32> to vector<[4]x3xf32>
558  return %0 : vector<[4]x3xf32>
559}
560// CHECK-LABEL: @broadcast_stretch_at_end_scalable
561// CHECK-SAME:  %[[A:.*]]: vector<[4]x1xf32>)
562// CHECK: vector.broadcast %[[A]] : vector<[4]x1xf32> to vector<[4]x3xf32>
563
564// -----
565
566func.func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> {
567  %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32>
568  return %0 : vector<4x3x2xf32>
569}
570// CHECK-LABEL: @broadcast_stretch_in_middle(
571// CHECK-SAME:  %[[A:.*]]: vector<4x1x2xf32>) -> vector<4x3x2xf32> {
572// CHECK:       %[[T3:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>>
573// CHECK:       %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32>
574// CHECK:       %[[T9:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>>
575// CHECK:       %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32>
576// CHECK:       %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
577// CHECK:       %[[T4:.*]] = llvm.extractvalue %[[T3]][0, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
578// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>>
579// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][1] : !llvm.array<3 x vector<2xf32>>
580// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][2] : !llvm.array<3 x vector<2xf32>>
581// CHECK:       %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T9]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
582// CHECK:       %[[T12:.*]] = llvm.extractvalue %[[T3]][1, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
583// CHECK:       %[[T14:.*]] = llvm.insertvalue %[[T12]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>>
584// CHECK:       %[[T15:.*]] = llvm.insertvalue %[[T12]], %[[T14]][1] : !llvm.array<3 x vector<2xf32>>
585// CHECK:       %[[T16:.*]] = llvm.insertvalue %[[T12]], %[[T15]][2] : !llvm.array<3 x vector<2xf32>>
586// CHECK:       %[[T17:.*]] = llvm.insertvalue %[[T16]], %[[T10]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
587// CHECK:       %[[T19:.*]] = llvm.extractvalue %[[T3]][2, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
588// CHECK:       %[[T21:.*]] = llvm.insertvalue %[[T19]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>>
589// CHECK:       %[[T22:.*]] = llvm.insertvalue %[[T19]], %[[T21]][1] : !llvm.array<3 x vector<2xf32>>
590// CHECK:       %[[T23:.*]] = llvm.insertvalue %[[T19]], %[[T22]][2] : !llvm.array<3 x vector<2xf32>>
591// CHECK:       %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T17]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
592// CHECK:       %[[T26:.*]] = llvm.extractvalue %[[T3]][3, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
593// CHECK:       %[[T28:.*]] = llvm.insertvalue %[[T26]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>>
594// CHECK:       %[[T29:.*]] = llvm.insertvalue %[[T26]], %[[T28]][1] : !llvm.array<3 x vector<2xf32>>
595// CHECK:       %[[T30:.*]] = llvm.insertvalue %[[T26]], %[[T29]][2] : !llvm.array<3 x vector<2xf32>>
596// CHECK:       %[[T31:.*]] = llvm.insertvalue %[[T30]], %[[T24]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
597// CHECK:       %[[T32:.*]] = builtin.unrealized_conversion_cast %[[T31]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32>
598// CHECK:       return %[[T32]] : vector<4x3x2xf32>
599
600// -----
601
602func.func @broadcast_stretch_in_middle_scalable_v1(%arg0: vector<4x1x[2]xf32>) -> vector<4x3x[2]xf32> {
603  %0 = vector.broadcast %arg0 : vector<4x1x[2]xf32> to vector<4x3x[2]xf32>
604  return %0 : vector<4x3x[2]xf32>
605}
606// CHECK-LABEL: @broadcast_stretch_in_middle_scalable_v1(
607// CHECK-SAME:  %[[A:.*]]: vector<4x1x[2]xf32>) -> vector<4x3x[2]xf32> {
608// CHECK:       %[[T3:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4x1x[2]xf32> to !llvm.array<4 x array<1 x vector<[2]xf32>>>
609// CHECK:       %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x[2]xf32>
610// CHECK:       %[[T9:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x[2]xf32> to !llvm.array<4 x array<3 x vector<[2]xf32>>>
611// CHECK:       %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<3x[2]xf32>
612// CHECK:       %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>>
613// CHECK:       %[[T4:.*]] = llvm.extractvalue %[[T3]][0, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>>
614// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>>
615// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][1] : !llvm.array<3 x vector<[2]xf32>>
616// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][2] : !llvm.array<3 x vector<[2]xf32>>
617// CHECK:       %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T9]][0] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
618// CHECK:       %[[T12:.*]] = llvm.extractvalue %[[T3]][1, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>>
619// CHECK:       %[[T14:.*]] = llvm.insertvalue %[[T12]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>>
620// CHECK:       %[[T15:.*]] = llvm.insertvalue %[[T12]], %[[T14]][1] : !llvm.array<3 x vector<[2]xf32>>
621// CHECK:       %[[T16:.*]] = llvm.insertvalue %[[T12]], %[[T15]][2] : !llvm.array<3 x vector<[2]xf32>>
622// CHECK:       %[[T17:.*]] = llvm.insertvalue %[[T16]], %[[T10]][1] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
623// CHECK:       %[[T19:.*]] = llvm.extractvalue %[[T3]][2, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>>
624// CHECK:       %[[T21:.*]] = llvm.insertvalue %[[T19]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>>
625// CHECK:       %[[T22:.*]] = llvm.insertvalue %[[T19]], %[[T21]][1] : !llvm.array<3 x vector<[2]xf32>>
626// CHECK:       %[[T23:.*]] = llvm.insertvalue %[[T19]], %[[T22]][2] : !llvm.array<3 x vector<[2]xf32>>
627// CHECK:       %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T17]][2] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
628// CHECK:       %[[T26:.*]] = llvm.extractvalue %[[T3]][3, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>>
629// CHECK:       %[[T28:.*]] = llvm.insertvalue %[[T26]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>>
630// CHECK:       %[[T29:.*]] = llvm.insertvalue %[[T26]], %[[T28]][1] : !llvm.array<3 x vector<[2]xf32>>
631// CHECK:       %[[T30:.*]] = llvm.insertvalue %[[T26]], %[[T29]][2] : !llvm.array<3 x vector<[2]xf32>>
632// CHECK:       %[[T31:.*]] = llvm.insertvalue %[[T30]], %[[T24]][3] : !llvm.array<4 x array<3 x vector<[2]xf32>>>
633// CHECK:       %[[T32:.*]] = builtin.unrealized_conversion_cast %[[T31]] : !llvm.array<4 x array<3 x vector<[2]xf32>>> to vector<4x3x[2]xf32>
634// CHECK:       return %[[T32]] : vector<4x3x[2]xf32>
635
636// -----
637
638// TODO: Add support for scalable vectors
639
640func.func @broadcast_stretch_in_middle_scalable_v2(%arg0: vector<[4]x1x2xf32>) -> vector<[4]x3x2xf32> {
641  %0 = vector.broadcast %arg0 : vector<[4]x1x2xf32> to vector<[4]x3x2xf32>
642  return %0 : vector<[4]x3x2xf32>
643}
644// CHECK-LABEL: @broadcast_stretch_in_middle_scalable_v2(
645// CHECK-SAME:  %[[A:.*]]: vector<[4]x1x2xf32>) -> vector<[4]x3x2xf32> {
646// CHECK:  vector.broadcast %[[A]] : vector<[4]x1x2xf32> to vector<[4]x3x2xf32>
647
648// -----
649
650//===----------------------------------------------------------------------===//
651// vector.outerproduct
652//===----------------------------------------------------------------------===//
653
654func.func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32> {
655  %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32>
656  return %2 : vector<2x3xf32>
657}
658// CHECK-LABEL: @outerproduct(
659// CHECK-SAME:  %[[A:.*]]: vector<2xf32>,
660// CHECK-SAME:  %[[B:.*]]: vector<3xf32>)
661// CHECK:       %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32>
662// CHECK:       %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>>
663// CHECK:       %[[T3:.*]] = llvm.mlir.constant(0 : i64) : i64
664// CHECK:       %[[T4:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T3]] : i64] : vector<2xf32>
665// CHECK:       %[[T5Insert:.*]] = llvm.insertelement %[[T4]]
666// CHECK:       %[[T5:.*]] = llvm.shufflevector %[[T5Insert]]
667// CHECK:       %[[T6:.*]] = arith.mulf %[[T5]], %[[B]] : vector<3xf32>
668// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<2 x vector<3xf32>>
669// CHECK:       %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64
670// CHECK:       %[[T10:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T9]] : i64] : vector<2xf32>
671// CHECK:       %[[T11Insert:.*]] = llvm.insertelement %[[T10]]
672// CHECK:       %[[T11:.*]] = llvm.shufflevector %[[T11Insert]]
673// CHECK:       %[[T12:.*]] = arith.mulf %[[T11]], %[[B]] : vector<3xf32>
674// CHECK:       %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T8]][1] : !llvm.array<2 x vector<3xf32>>
675// CHECK:       %[[T14:.*]] = builtin.unrealized_conversion_cast %[[T13]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32>
676// CHECK:       return %[[T14]] : vector<2x3xf32>
677
678// -----
679
680func.func @outerproduct_scalable(%arg0: vector<2xf32>, %arg1: vector<[3]xf32>) -> vector<2x[3]xf32> {
681  %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<[3]xf32>
682  return %2 : vector<2x[3]xf32>
683}
684// CHECK-LABEL: @outerproduct_scalable
685// CHECK-SAME:  %[[A:.*]]: vector<2xf32>,
686// CHECK-SAME:  %[[B:.*]]: vector<[3]xf32>)
687// CHECK:       %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<2x[3]xf32>
688// CHECK:       %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<2x[3]xf32> to !llvm.array<2 x vector<[3]xf32>>
689// CHECK:       %[[T3:.*]] = llvm.mlir.constant(0 : i64) : i64
690// CHECK:       %[[T4:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T3]] : i64] : vector<2xf32>
691// CHECK:       %[[T5Insert:.*]] = llvm.insertelement %[[T4]]
692// CHECK:       %[[T5:.*]] = llvm.shufflevector %[[T5Insert]]
693// CHECK:       %[[T6:.*]] = arith.mulf %[[T5]], %[[B]] : vector<[3]xf32>
694// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<2 x vector<[3]xf32>>
695// CHECK:       %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64
696// CHECK:       %[[T10:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T9]] : i64] : vector<2xf32>
697// CHECK:       %[[T11Insert:.*]] = llvm.insertelement %[[T10]]
698// CHECK:       %[[T11:.*]] = llvm.shufflevector %[[T11Insert]]
699// CHECK:       %[[T12:.*]] = arith.mulf %[[T11]], %[[B]] : vector<[3]xf32>
700// CHECK:       %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T8]][1] : !llvm.array<2 x vector<[3]xf32>>
701// CHECK:       %[[T14:.*]] = builtin.unrealized_conversion_cast %[[T13]] : !llvm.array<2 x vector<[3]xf32>> to vector<2x[3]xf32>
702// CHECK:       return %[[T14]] : vector<2x[3]xf32>
703
704// -----
705
706func.func @outerproduct_index(%arg0: vector<2xindex>, %arg1: vector<3xindex>) -> vector<2x3xindex> {
707  %2 = vector.outerproduct %arg0, %arg1 : vector<2xindex>, vector<3xindex>
708  return %2 : vector<2x3xindex>
709}
710// CHECK-LABEL: @outerproduct_index(
711// CHECK-SAME:  %[[A:.*]]: vector<2xindex>,
712// CHECK-SAME:  %[[B:.*]]: vector<3xindex>)
713// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<2xindex> to vector<2xi64>
714// CHECK:       %[[T0:.*]] = arith.constant dense<0> : vector<2x3xindex>
715// CHECK:       %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<2x3xindex> to !llvm.array<2 x vector<3xi64>>
716// CHECK:       %[[T2:.*]] = llvm.mlir.constant(0 : i64) : i64
717// CHECK:       %[[T3:.*]] = llvm.extractelement %[[T1]]{{\[}}%[[T2]] : i64] : vector<2xi64>
718// CHECK:       %[[T4:.*]] = llvm.insertelement %[[T3]]
719// CHECK:       %[[T5:.*]] = llvm.shufflevector %[[T4]]
720// CHECK:       %[[T5Cast:.*]] = builtin.unrealized_conversion_cast %[[T5]] : vector<3xi64> to vector<3xindex>
721// CHECK:       %[[T6:.*]] = arith.muli %[[T5Cast]], %[[B]] : vector<3xindex>
722// CHECK:       %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T6]] : vector<3xindex> to vector<3xi64>
723// CHECK:       %{{.*}} = llvm.insertvalue %[[T7]], %[[T8]][0] : !llvm.array<2 x vector<3xi64>>
724
725// -----
726
727func.func @outerproduct_index_scalable(%arg0: vector<2xindex>, %arg1: vector<[3]xindex>) -> vector<2x[3]xindex> {
728  %2 = vector.outerproduct %arg0, %arg1 : vector<2xindex>, vector<[3]xindex>
729  return %2 : vector<2x[3]xindex>
730}
731// CHECK-LABEL: @outerproduct_index_scalable
732// CHECK-SAME:  %[[A:.*]]: vector<2xindex>,
733// CHECK-SAME:  %[[B:.*]]: vector<[3]xindex>)
734// CHECK:       %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<2xindex> to vector<2xi64>
735// CHECK:       %[[T0:.*]] = arith.constant dense<0> : vector<2x[3]xindex>
736// CHECK:       %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<2x[3]xindex> to !llvm.array<2 x vector<[3]xi64>>
737// CHECK:       %[[T2:.*]] = llvm.mlir.constant(0 : i64) : i64
738// CHECK:       %[[T3:.*]] = llvm.extractelement %[[T1]]{{\[}}%[[T2]] : i64] : vector<2xi64>
739// CHECK:       %[[T4:.*]] = llvm.insertelement %[[T3]]
740// CHECK:       %[[T5:.*]] = llvm.shufflevector %[[T4]]
741// CHECK:       %[[T5Cast:.*]] = builtin.unrealized_conversion_cast %[[T5]] : vector<[3]xi64> to vector<[3]xindex>
742// CHECK:       %[[T6:.*]] = arith.muli %[[T5Cast]], %[[B]] : vector<[3]xindex>
743// CHECK:       %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T6]] : vector<[3]xindex> to vector<[3]xi64>
744// CHECK:       %{{.*}} = llvm.insertvalue %[[T7]], %[[T8]][0] : !llvm.array<2 x vector<[3]xi64>>
745
746// -----
747
748func.func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> {
749  %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32>
750  return %2 : vector<2x3xf32>
751}
752// CHECK-LABEL: @outerproduct_add(
753// CHECK-SAME:  %[[A:.*]]: vector<2xf32>,
754// CHECK-SAME:  %[[B:.*]]: vector<3xf32>,
755// CHECK-SAME:  %[[C:.*]]: vector<2x3xf32>) -> vector<2x3xf32>
756// CHECK:       %[[T7:.*]] = builtin.unrealized_conversion_cast %[[C]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>>
757// CHECK:       %[[T3:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32>
758// CHECK:       %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T3]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>>
759// CHECK:       %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64
760// CHECK:       %[[T5:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T4]] : i64] : vector<2xf32>
761// CHECK:       %[[T6Insert:.*]] = llvm.insertelement %[[T5]]
762// CHECK:       %[[T6:.*]] = llvm.shufflevector %[[T6Insert]]
763// CHECK:       %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<3xf32>>
764// CHECK:       %[[T9:.*]] = llvm.intr.fmuladd(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32>
765// CHECK:       %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<3xf32>>
766// CHECK:       %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64
767// CHECK:       %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32>
768// CHECK:       %[[T14Insert:.*]] = llvm.insertelement %[[T13]]
769// CHECK:       %[[T14:.*]] = llvm.shufflevector %[[T14Insert]]
770// CHECK:       %[[T16:.*]] = llvm.extractvalue %[[T7]][1] : !llvm.array<2 x vector<3xf32>>
771// CHECK:       %[[T17:.*]] = llvm.intr.fmuladd(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32>
772// CHECK:       %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<3xf32>>
773// CHECK:       %[[T19:.*]] = builtin.unrealized_conversion_cast %[[T18]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32>
774// CHECK:       return %[[T19]] : vector<2x3xf32>
775
776// -----
777
778func.func @outerproduct_add_scalable(%arg0: vector<2xf32>, %arg1: vector<[3]xf32>, %arg2: vector<2x[3]xf32>) -> vector<2x[3]xf32> {
779  %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<[3]xf32>
780  return %2 : vector<2x[3]xf32>
781}
782// CHECK-LABEL: @outerproduct_add_scalable
783// CHECK-SAME:  %[[A:.*]]: vector<2xf32>,
784// CHECK-SAME:  %[[B:.*]]: vector<[3]xf32>,
785// CHECK-SAME:  %[[C:.*]]: vector<2x[3]xf32>) -> vector<2x[3]xf32>
786// CHECK:       %[[T7:.*]] = builtin.unrealized_conversion_cast %[[C]] : vector<2x[3]xf32> to !llvm.array<2 x vector<[3]xf32>>
787// CHECK:       %[[T3:.*]] = arith.constant dense<0.000000e+00> : vector<2x[3]xf32>
788// CHECK:       %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T3]] : vector<2x[3]xf32> to !llvm.array<2 x vector<[3]xf32>>
789// CHECK:       %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64
790// CHECK:       %[[T5:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T4]] : i64] : vector<2xf32>
791// CHECK:       %[[T6Insert:.*]] = llvm.insertelement %[[T5]]
792// CHECK:       %[[T6:.*]] = llvm.shufflevector %[[T6Insert]]
793// CHECK:       %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<[3]xf32>>
794// CHECK:       %[[T9:.*]] = llvm.intr.fmuladd(%[[T6]], %[[B]], %[[T8]]) : (vector<[3]xf32>, vector<[3]xf32>, vector<[3]xf32>) -> vector<[3]xf32>
795// CHECK:       %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<[3]xf32>>
796// CHECK:       %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64
797// CHECK:       %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32>
798// CHECK:       %[[T14Insert:.*]] = llvm.insertelement %[[T13]]
799// CHECK:       %[[T14:.*]] = llvm.shufflevector %[[T14Insert]]
800// CHECK:       %[[T16:.*]] = llvm.extractvalue %[[T7]][1] : !llvm.array<2 x vector<[3]xf32>>
801// CHECK:       %[[T17:.*]] = llvm.intr.fmuladd(%[[T14]], %[[B]], %[[T16]]) : (vector<[3]xf32>, vector<[3]xf32>, vector<[3]xf32>) -> vector<[3]xf32>
802// CHECK:       %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<[3]xf32>>
803// CHECK:       %[[T19:.*]] = builtin.unrealized_conversion_cast %[[T18]] : !llvm.array<2 x vector<[3]xf32>> to vector<2x[3]xf32>
804// CHECK:       return %[[T19]] : vector<2x[3]xf32>
805
806// -----
807
808//===----------------------------------------------------------------------===//
809// vector.mask { vector.outerproduct }
810//===----------------------------------------------------------------------===//
811
812func.func @masked_float_add_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> {
813  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32>
814  return %0 : vector<2xf32>
815}
816
817// CHECK-LABEL:   func.func @masked_float_add_outerprod(
818// CHECK-SAME:                                          %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> {
819// CHECK:           %[[VAL_8:.*]] = llvm.intr.fmuladd(%[[VAL_0]], %{{.*}}, %[[VAL_2]])  : (vector<2xf32>, vector<2xf32>, vector<2xf32>) -> vector<2xf32>
820// CHECK:           %[[VAL_9:.*]] = arith.select %[[VAL_3]], %[[VAL_8]], %[[VAL_2]] : vector<2xi1>, vector<2xf32>
821
822// -----
823
824func.func @masked_float_add_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> {
825  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32>
826  return %0 : vector<[2]xf32>
827}
828
829// CHECK-LABEL:   func.func @masked_float_add_outerprod_scalable(
830// CHECK-SAME:                                                   %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> {
831// CHECK:           %[[VAL_8:.*]] = llvm.intr.fmuladd(%[[VAL_0]], %{{.*}}, %[[VAL_2]])  : (vector<[2]xf32>, vector<[2]xf32>, vector<[2]xf32>) -> vector<[2]xf32>
832// CHECK:           %[[VAL_9:.*]] = arith.select %[[VAL_3]], %[[VAL_8]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32>
833
834// -----
835
836func.func @masked_float_mul_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> {
837  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32>
838  return %0 : vector<2xf32>
839}
840
841// CHECK-LABEL:   func.func @masked_float_mul_outerprod(
842// CHECK-SAME:                                          %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> {
843// CHECK:           %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32>
844// CHECK:           %[[VAL_9:.*]] = arith.mulf %[[VAL_8]], %[[VAL_2]] : vector<2xf32>
845// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32>
846
847// -----
848
849func.func @masked_float_mul_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> {
850  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32>
851  return %0 : vector<[2]xf32>
852}
853
854// CHECK-LABEL:   func.func @masked_float_mul_outerprod_scalable(
855// CHECK-SAME:                                                   %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> {
856// CHECK:           %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<[2]xf32>
857// CHECK:           %[[VAL_9:.*]] = arith.mulf %[[VAL_8]], %[[VAL_2]] : vector<[2]xf32>
858// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32>
859
860// -----
861
862func.func @masked_float_max_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> {
863  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxnumf>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32>
864  return %0 : vector<2xf32>
865}
866
867// CHECK-LABEL:   func.func @masked_float_max_outerprod(
868// CHECK-SAME:                                          %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> {
869// CHECK:           %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32>
870// CHECK:           %[[VAL_9:.*]] = arith.maxnumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32>
871// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32>
872
873// -----
874
875func.func @masked_float_max_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> {
876  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxnumf>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32>
877  return %0 : vector<[2]xf32>
878}
879
880// CHECK-LABEL:   func.func @masked_float_max_outerprod_scalable(
881// CHECK-SAME:                                                   %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> {
882// CHECK:           %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<[2]xf32>
883// CHECK:           %[[VAL_9:.*]] = arith.maxnumf %[[VAL_8]], %[[VAL_2]] : vector<[2]xf32>
884// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32>
885
886// -----
887
888func.func @masked_float_min_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> {
889  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minnumf>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32>
890  return %0 : vector<2xf32>
891}
892
893// CHECK-LABEL:   func.func @masked_float_min_outerprod(
894// CHECK-SAME:                                          %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> {
895// CHECK:           %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32>
896// CHECK:           %[[VAL_9:.*]] = arith.minnumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32>
897// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32>
898
899// -----
900
901func.func @masked_float_min_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> {
902  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minnumf>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32>
903  return %0 : vector<[2]xf32>
904}
905
906// CHECK-LABEL:   func.func @masked_float_min_outerprod_scalable(
907// CHECK-SAME:                                                   %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> {
908// CHECK:           %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<[2]xf32>
909// CHECK:           %[[VAL_9:.*]] = arith.minnumf %[[VAL_8]], %[[VAL_2]] : vector<[2]xf32>
910// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32>
911
912// -----
913
914func.func @masked_int_add_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> {
915  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32>
916  return %0 : vector<2xi32>
917}
918
919// CHECK-LABEL:   func.func @masked_int_add_outerprod(
920// CHECK-SAME:                                        %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> {
921// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32>
922// CHECK:           %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : vector<2xi32>
923// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32>
924
925// -----
926
927func.func @masked_int_add_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> {
928  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32>
929  return %0 : vector<[2]xi32>
930}
931
932// CHECK-LABEL:   func.func @masked_int_add_outerprod_scalable(
933// CHECK-SAME:                                                 %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> {
934// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32>
935// CHECK:           %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32>
936// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32>
937
938// -----
939
940func.func @masked_int_mul_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> {
941  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32>
942  return %0 : vector<2xi32>
943}
944
945// CHECK-LABEL:   func.func @masked_int_mul_outerprod(
946// CHECK-SAME:                                        %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> {
947// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32>
948// CHECK:           %[[VAL_9:.*]] = arith.muli %[[VAL_8]], %[[VAL_2]] : vector<2xi32>
949// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32>
950
951// -----
952
953func.func @masked_int_mul_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> {
954  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32>
955  return %0 : vector<[2]xi32>
956}
957
958// CHECK-LABEL:   func.func @masked_int_mul_outerprod_scalable(
959// CHECK-SAME:                                                 %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> {
960// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32>
961// CHECK:           %[[VAL_9:.*]] = arith.muli %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32>
962// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32>
963
964// -----
965
966func.func @masked_int_max_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> {
967  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxsi>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32>
968  return %0 : vector<2xi32>
969}
970
971// CHECK-LABEL:   func.func @masked_int_max_outerprod(
972// CHECK-SAME:                                        %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> {
973// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32>
974// CHECK:           %[[VAL_9:.*]] = arith.maxsi %[[VAL_8]], %[[VAL_2]] : vector<2xi32>
975// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32>
976
977// -----
978
979func.func @masked_int_max_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> {
980  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxsi>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32>
981  return %0 : vector<[2]xi32>
982}
983
984// CHECK-LABEL:   func.func @masked_int_max_outerprod_scalable(
985// CHECK-SAME:                                                 %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> {
986// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32>
987// CHECK:           %[[VAL_9:.*]] = arith.maxsi %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32>
988// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32>
989
990// -----
991
992func.func @masked_int_min_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> {
993  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minui>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32>
994  return %0 : vector<2xi32>
995}
996
997// CHECK-LABEL:   func.func @masked_int_min_outerprod(
998// CHECK-SAME:                                        %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> {
999// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32>
1000// CHECK:           %[[VAL_9:.*]] = arith.minui %[[VAL_8]], %[[VAL_2]] : vector<2xi32>
1001// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32>
1002
1003// -----
1004
1005func.func @masked_int_min_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> {
1006  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minui>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32>
1007  return %0 : vector<[2]xi32>
1008}
1009
1010// CHECK-LABEL:   func.func @masked_int_min_outerprod_scalable(
1011// CHECK-SAME:                                                 %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> {
1012// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32>
1013// CHECK:           %[[VAL_9:.*]] = arith.minui %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32>
1014// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32>
1015
1016// -----
1017
1018func.func @masked_int_and_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> {
1019  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<and>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32>
1020  return %0 : vector<2xi32>
1021}
1022
1023// CHECK-LABEL:   func.func @masked_int_and_outerprod(
1024// CHECK-SAME:                                        %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> {
1025// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32>
1026// CHECK:           %[[VAL_9:.*]] = arith.andi %[[VAL_8]], %[[VAL_2]] : vector<2xi32>
1027// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32>
1028
1029// -----
1030
1031func.func @masked_int_and_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> {
1032  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<and>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32>
1033  return %0 : vector<[2]xi32>
1034}
1035
1036// CHECK-LABEL:   func.func @masked_int_and_outerprod_scalable(
1037// CHECK-SAME:                                                 %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> {
1038// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32>
1039// CHECK:           %[[VAL_9:.*]] = arith.andi %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32>
1040// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32>
1041
1042// -----
1043
1044func.func @masked_int_or_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> {
1045  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<or>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32>
1046  return %0 : vector<2xi32>
1047}
1048
1049// CHECK-LABEL:   func.func @masked_int_or_outerprod(
1050// CHECK-SAME:                                       %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> {
1051// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32>
1052// CHECK:           %[[VAL_9:.*]] = arith.ori %[[VAL_8]], %[[VAL_2]] : vector<2xi32>
1053// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32>
1054
1055// -----
1056
1057func.func @masked_int_or_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> {
1058  %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<or>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32>
1059  return %0 : vector<[2]xi32>
1060}
1061
1062// CHECK-LABEL:   func.func @masked_int_or_outerprod_scalable
1063// CHECK-SAME:                                       %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> {
1064// CHECK:           %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32>
1065// CHECK:           %[[VAL_9:.*]] = arith.ori %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32>
1066// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32>
1067
1068// -----
1069
1070//===----------------------------------------------------------------------===//
1071// vector.shuffle
1072//===----------------------------------------------------------------------===//
1073
1074func.func @shuffle_0D_direct(%arg0: vector<f32>) -> vector<3xf32> {
1075  %1 = vector.shuffle %arg0, %arg0 [0, 1, 0] : vector<f32>, vector<f32>
1076  return %1 : vector<3xf32>
1077}
1078// CHECK-LABEL: @shuffle_0D_direct(
1079//  CHECK-SAME:     %[[A:.*]]: vector<f32>
1080//       CHECK:   %[[c:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<f32> to vector<1xf32>
1081//       CHECK:   %[[s:.*]] = llvm.shufflevector %[[c]], %[[c]] [0, 1, 0] : vector<1xf32>
1082//       CHECK:   return %[[s]] : vector<3xf32>
1083
1084// -----
1085
1086func.func @shuffle_1D_direct(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<2xf32> {
1087  %1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xf32>, vector<2xf32>
1088  return %1 : vector<2xf32>
1089}
1090// CHECK-LABEL: @shuffle_1D_direct(
1091// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
1092// CHECK-SAME: %[[B:.*]]: vector<2xf32>)
1093//       CHECK:   return %[[A:.*]]: vector<2xf32>
1094
1095// -----
1096
1097func.func @shuffle_1D_index_direct(%arg0: vector<2xindex>, %arg1: vector<2xindex>) -> vector<2xindex> {
1098  %1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xindex>, vector<2xindex>
1099  return %1 : vector<2xindex>
1100}
1101// CHECK-LABEL: @shuffle_1D_index_direct(
1102// CHECK-SAME: %[[A:.*]]: vector<2xindex>,
1103// CHECK-SAME: %[[B:.*]]: vector<2xindex>)
1104//       CHECK:   return  %[[A:.*]]: vector<2xindex>
1105
1106// -----
1107
1108func.func @shuffle_poison_mask(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<4xf32> {
1109  %1 = vector.shuffle %arg0, %arg1 [0, -1, 3, -1] : vector<2xf32>, vector<2xf32>
1110  return %1 : vector<4xf32>
1111}
1112// CHECK-LABEL: @shuffle_poison_mask(
1113//  CHECK-SAME:   %[[A:.*]]: vector<2xf32>, %[[B:.*]]: vector<2xf32>)
1114//       CHECK:     %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, -1, 3, -1] : vector<2xf32>
1115
1116// -----
1117
1118func.func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> {
1119  %1 = vector.shuffle %arg0, %arg1 [4, 3, 2, 1, 0] : vector<2xf32>, vector<3xf32>
1120  return %1 : vector<5xf32>
1121}
1122// CHECK-LABEL: @shuffle_1D(
1123// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
1124// CHECK-SAME: %[[B:.*]]: vector<3xf32>)
1125//       CHECK:   %[[U0:.*]] = llvm.mlir.undef : vector<5xf32>
1126//       CHECK:   %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64
1127//       CHECK:   %[[E1:.*]] = llvm.extractelement %[[B]][%[[C2]] : i64] : vector<3xf32>
1128//       CHECK:   %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
1129//       CHECK:   %[[I1:.*]] = llvm.insertelement %[[E1]], %[[U0]][%[[C0]] : i64] : vector<5xf32>
1130//       CHECK:   %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
1131//       CHECK:   %[[E2:.*]] = llvm.extractelement %[[B]][%[[C1]] : i64] : vector<3xf32>
1132//       CHECK:   %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
1133//       CHECK:   %[[I2:.*]] = llvm.insertelement %[[E2]], %[[I1]][%[[C1]] : i64] : vector<5xf32>
1134//       CHECK:   %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
1135//       CHECK:   %[[E3:.*]] = llvm.extractelement %[[B]][%[[C0]] : i64] : vector<3xf32>
1136//       CHECK:   %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64
1137//       CHECK:   %[[I3:.*]] = llvm.insertelement %[[E3]], %[[I2]][%[[C2]] : i64] : vector<5xf32>
1138//       CHECK:   %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
1139//       CHECK:   %[[E4:.*]] = llvm.extractelement %[[A]][%[[C1]] : i64] : vector<2xf32>
1140//       CHECK:   %[[C3:.*]] = llvm.mlir.constant(3 : index) : i64
1141//       CHECK:   %[[I4:.*]] = llvm.insertelement %[[E4]], %[[I3]][%[[C3]] : i64] : vector<5xf32>
1142//       CHECK:   %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
1143//       CHECK:   %[[E5:.*]] = llvm.extractelement %[[A]][%[[C0]] : i64] : vector<2xf32>
1144//       CHECK:   %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64
1145//       CHECK:   %[[I5:.*]] = llvm.insertelement %[[E5]], %[[I4]][%[[C4]] : i64] : vector<5xf32>
1146//       CHECK:   return %[[I5]] : vector<5xf32>
1147
1148// -----
1149
1150func.func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> {
1151  %1 = vector.shuffle %a, %b[1, 0, 2] : vector<1x4xf32>, vector<2x4xf32>
1152  return %1 : vector<3x4xf32>
1153}
1154// CHECK-LABEL: @shuffle_2D(
1155// CHECK-SAME: %[[A:.*]]: vector<1x4xf32>,
1156// CHECK-SAME: %[[B:.*]]: vector<2x4xf32>)
1157//       CHECK-DAG:   %[[VAL_0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>>
1158//       CHECK-DAG:   %[[VAL_1:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
1159//       CHECK:   %[[U0:.*]] = llvm.mlir.undef : !llvm.array<3 x vector<4xf32>>
1160//       CHECK:   %[[E1:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<4xf32>>
1161//       CHECK:   %[[I1:.*]] = llvm.insertvalue %[[E1]], %[[U0]][0] : !llvm.array<3 x vector<4xf32>>
1162//       CHECK:   %[[E2:.*]] = llvm.extractvalue %[[VAL_0]][0] : !llvm.array<1 x vector<4xf32>>
1163//       CHECK:   %[[I2:.*]] = llvm.insertvalue %[[E2]], %[[I1]][1] : !llvm.array<3 x vector<4xf32>>
1164//       CHECK:   %[[E3:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<4xf32>>
1165//       CHECK:   %[[I3:.*]] = llvm.insertvalue %[[E3]], %[[I2]][2] : !llvm.array<3 x vector<4xf32>>
1166//       CHECK:   %[[VAL_3:.*]] = builtin.unrealized_conversion_cast %[[I3]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32>
1167//       CHECK:   return %[[VAL_3]] : vector<3x4xf32>
1168
1169// -----
1170
1171//===----------------------------------------------------------------------===//
1172// vector.extractelement
1173//===----------------------------------------------------------------------===//
1174
1175func.func @extractelement_from_vec_0d_f32(%arg0: vector<f32>) -> f32 {
1176  %1 = vector.extractelement %arg0[] : vector<f32>
1177  return %1 : f32
1178}
1179// CHECK-LABEL: @extractelement_from_vec_0d_f32
1180//       CHECK:   %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
1181//       CHECK:   llvm.extractelement %{{.*}}[%[[C0]] : {{.*}}] : vector<1xf32>
1182
1183// -----
1184
1185func.func @extractelement_from_vec_1d_f32_idx_as_i32(%arg0: vector<16xf32>) -> f32 {
1186  %0 = arith.constant 15 : i32
1187  %1 = vector.extractelement %arg0[%0 : i32]: vector<16xf32>
1188  return %1 : f32
1189}
1190// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32(
1191//  CHECK-SAME:   %[[A:.*]]: vector<16xf32>)
1192//       CHECK:   %[[C:.*]] = arith.constant 15 : i32
1193//       CHECK:   %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<16xf32>
1194//       CHECK:   return %[[X]] : f32
1195
1196// -----
1197
1198func.func @extractelement_from_vec_1d_f32_idx_as_i32_scalable(%arg0: vector<[16]xf32>) -> f32 {
1199  %0 = arith.constant 15 : i32
1200  %1 = vector.extractelement %arg0[%0 : i32]: vector<[16]xf32>
1201  return %1 : f32
1202}
1203// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32_scalable(
1204//  CHECK-SAME:   %[[A:.*]]: vector<[16]xf32>)
1205//       CHECK:   %[[C:.*]] = arith.constant 15 : i32
1206//       CHECK:   %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<[16]xf32>
1207//       CHECK:   return %[[X]] : f32
1208
1209// -----
1210func.func @extractelement_from_vec_1d_f32_idx_as_index(%arg0: vector<16xf32>) -> f32 {
1211  %0 = arith.constant 15 : index
1212  %1 = vector.extractelement %arg0[%0 : index]: vector<16xf32>
1213  return %1 : f32
1214}
1215// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index(
1216//  CHECK-SAME:   %[[A:.*]]: vector<16xf32>)
1217//       CHECK:   %[[C:.*]] = arith.constant 15 : index
1218//       CHECK:   %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64
1219//       CHECK:   %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<16xf32>
1220//       CHECK:   return %[[X]] : f32
1221
1222// -----
1223
1224func.func @extractelement_from_vec_1d_f32_idx_as_index_scalable(%arg0: vector<[16]xf32>) -> f32 {
1225  %0 = arith.constant 15 : index
1226  %1 = vector.extractelement %arg0[%0 : index]: vector<[16]xf32>
1227  return %1 : f32
1228}
1229// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index_scalable(
1230//  CHECK-SAME:   %[[A:.*]]: vector<[16]xf32>)
1231//       CHECK:   %[[C:.*]] = arith.constant 15 : index
1232//       CHECK:   %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64
1233//       CHECK:   %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<[16]xf32>
1234//       CHECK:   return %[[X]] : f32
1235
1236// -----
1237
1238//===----------------------------------------------------------------------===//
1239// vector.extract
1240//===----------------------------------------------------------------------===//
1241
1242func.func @extract_scalar_from_vec_1d_f32(%arg0: vector<16xf32>) -> f32 {
1243  %0 = vector.extract %arg0[15]: f32 from vector<16xf32>
1244  return %0 : f32
1245}
1246// CHECK-LABEL: @extract_scalar_from_vec_1d_f32
1247//       CHECK:   llvm.mlir.constant(15 : i64) : i64
1248//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32>
1249//       CHECK:   return {{.*}} : f32
1250
1251// -----
1252
1253func.func @extract_poison_idx(%arg0: vector<16xf32>) -> f32 {
1254  %0 = vector.extract %arg0[-1]: f32 from vector<16xf32>
1255  return %0 : f32
1256}
1257// CHECK-LABEL: @extract_poison_idx
1258//       CHECK:   %[[IDX:.*]] = llvm.mlir.constant(-1 : i64) : i64
1259//       CHECK:   llvm.extractelement {{.*}}[%[[IDX]] : i64] : vector<16xf32>
1260
1261// -----
1262
1263func.func @extract_scalar_from_vec_1d_f32_scalable(%arg0: vector<[16]xf32>) -> f32 {
1264  %0 = vector.extract %arg0[15]: f32 from vector<[16]xf32>
1265  return %0 : f32
1266}
1267// CHECK-LABEL: @extract_scalar_from_vec_1d_f32_scalable
1268//       CHECK:   llvm.mlir.constant(15 : i64) : i64
1269//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : i64] : vector<[16]xf32>
1270//       CHECK:   return {{.*}} : f32
1271
1272// -----
1273
1274func.func @extract_vec_1e_from_vec_1d_f32(%arg0: vector<16xf32>) -> vector<1xf32> {
1275  %0 = vector.extract %arg0[15]: vector<1xf32> from vector<16xf32>
1276  return %0 : vector<1xf32>
1277}
1278// CHECK-LABEL: @extract_vec_1e_from_vec_1d_f32(
1279//  CHECK-SAME:   %[[A:.*]]: vector<16xf32>)
1280//       CHECK:   %[[T0:.*]] = llvm.mlir.constant(15 : i64) : i64
1281//       CHECK:   %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : vector<16xf32>
1282//       CHECK:   %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : f32 to vector<1xf32>
1283//       CHECK:   return %[[T2]] : vector<1xf32>
1284
1285// -----
1286
1287func.func @extract_vec_1e_from_vec_1d_f32_scalable(%arg0: vector<[16]xf32>) -> vector<1xf32> {
1288  %0 = vector.extract %arg0[15]: vector<1xf32> from vector<[16]xf32>
1289  return %0 : vector<1xf32>
1290}
1291// CHECK-LABEL: @extract_vec_1e_from_vec_1d_f32_scalable(
1292//  CHECK-SAME:   %[[A:.*]]: vector<[16]xf32>)
1293//       CHECK:   %[[T0:.*]] = llvm.mlir.constant(15 : i64) : i64
1294//       CHECK:   %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : vector<[16]xf32>
1295//       CHECK:   %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : f32 to vector<1xf32>
1296//       CHECK:   return %[[T2]] : vector<1xf32>
1297
1298// -----
1299
1300func.func @extract_scalar_from_vec_1d_index(%arg0: vector<16xindex>) -> index {
1301  %0 = vector.extract %arg0[15]: index from vector<16xindex>
1302  return %0 : index
1303}
1304// CHECK-LABEL: @extract_scalar_from_vec_1d_index(
1305//  CHECK-SAME:   %[[A:.*]]: vector<16xindex>)
1306//       CHECK:   %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<16xindex> to vector<16xi64>
1307//       CHECK:   %[[T1:.*]] = llvm.mlir.constant(15 : i64) : i64
1308//       CHECK:   %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<16xi64>
1309//       CHECK:   %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index
1310//       CHECK:   return %[[T3]] : index
1311
1312// -----
1313
1314func.func @extract_scalar_from_vec_1d_index_scalable(%arg0: vector<[16]xindex>) -> index {
1315  %0 = vector.extract %arg0[15]: index from vector<[16]xindex>
1316  return %0 : index
1317}
1318// CHECK-LABEL: @extract_scalar_from_vec_1d_index_scalable(
1319//  CHECK-SAME:   %[[A:.*]]: vector<[16]xindex>)
1320//       CHECK:   %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<[16]xindex> to vector<[16]xi64>
1321//       CHECK:   %[[T1:.*]] = llvm.mlir.constant(15 : i64) : i64
1322//       CHECK:   %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<[16]xi64>
1323//       CHECK:   %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index
1324//       CHECK:   return %[[T3]] : index
1325
1326// -----
1327
1328func.func @extract_vec_2d_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> {
1329  %0 = vector.extract %arg0[0]: vector<3x16xf32> from vector<4x3x16xf32>
1330  return %0 : vector<3x16xf32>
1331}
1332// CHECK-LABEL: @extract_vec_2d_from_vec_3d_f32
1333//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<16xf32>>>
1334//       CHECK:   return {{.*}} : vector<3x16xf32>
1335
1336// -----
1337
1338func.func @extract_vec_2d_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<3x[16]xf32> {
1339  %0 = vector.extract %arg0[0]: vector<3x[16]xf32> from vector<4x3x[16]xf32>
1340  return %0 : vector<3x[16]xf32>
1341}
1342// CHECK-LABEL: @extract_vec_2d_from_vec_3d_f32_scalable
1343//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<[16]xf32>>>
1344//       CHECK:   return {{.*}} : vector<3x[16]xf32>
1345
1346// -----
1347
1348func.func @extract_vec_1d_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> vector<16xf32> {
1349  %0 = vector.extract %arg0[0, 0]: vector<16xf32> from vector<4x3x16xf32>
1350  return %0 : vector<16xf32>
1351}
1352// CHECK-LABEL: @extract_vec_1d_from_vec_3d_f32
1353//       CHECK:   llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>>
1354//       CHECK:   return {{.*}} : vector<16xf32>
1355
1356// -----
1357
1358func.func @extract_vec_1d_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<[16]xf32> {
1359  %0 = vector.extract %arg0[0, 0]: vector<[16]xf32> from vector<4x3x[16]xf32>
1360  return %0 : vector<[16]xf32>
1361}
1362// CHECK-LABEL: @extract_vec_1d_from_vec_3d_f32_scalable
1363//       CHECK:   llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<[16]xf32>>>
1364//       CHECK:   return {{.*}} : vector<[16]xf32>
1365
1366// -----
1367
1368func.func @extract_scalar_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> f32 {
1369  %0 = vector.extract %arg0[0, 0, 0]: f32 from vector<4x3x16xf32>
1370  return %0 : f32
1371}
1372// CHECK-LABEL: @extract_scalar_from_vec_3d_f32
1373//       CHECK:   llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>>
1374//       CHECK:   llvm.mlir.constant(0 : i64) : i64
1375//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32>
1376//       CHECK:   return {{.*}} : f32
1377
1378// -----
1379
1380func.func @extract_scalar_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> f32 {
1381  %0 = vector.extract %arg0[0, 0, 0]: f32 from vector<4x3x[16]xf32>
1382  return %0 : f32
1383}
1384// CHECK-LABEL: @extract_scalar_from_vec_3d_f32_scalable
1385//       CHECK:   llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<[16]xf32>>>
1386//       CHECK:   llvm.mlir.constant(0 : i64) : i64
1387//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : i64] : vector<[16]xf32>
1388//       CHECK:   return {{.*}} : f32
1389
1390// -----
1391
1392func.func @extract_scalar_from_vec_1d_f32_dynamic_idx(%arg0: vector<16xf32>, %arg1: index) -> f32 {
1393  %0 = vector.extract %arg0[%arg1]: f32 from vector<16xf32>
1394  return %0 : f32
1395}
1396// CHECK-LABEL: @extract_scalar_from_vec_1d_f32_dynamic_idx
1397//  CHECK-SAME:   %[[VEC:.+]]: vector<16xf32>, %[[INDEX:.+]]: index
1398//       CHECK:   %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64
1399//       CHECK:   llvm.extractelement %[[VEC]][%[[UC]] : i64] : vector<16xf32>
1400
1401// -----
1402
1403func.func @extract_scalar_from_vec_1d_f32_dynamic_idx_scalable(%arg0: vector<[16]xf32>, %arg1: index) -> f32 {
1404  %0 = vector.extract %arg0[%arg1]: f32 from vector<[16]xf32>
1405  return %0 : f32
1406}
1407// CHECK-LABEL: @extract_scalar_from_vec_1d_f32_dynamic_idx_scalable
1408//  CHECK-SAME:   %[[VEC:.+]]: vector<[16]xf32>, %[[INDEX:.+]]: index
1409//       CHECK:   %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64
1410//       CHECK:   llvm.extractelement %[[VEC]][%[[UC]] : i64] : vector<[16]xf32>
1411
1412// -----
1413
1414func.func @extract_scalar_from_vec_2d_f32_inner_dynamic_idx(%arg0: vector<1x16xf32>, %arg1: index) -> f32 {
1415  %0 = vector.extract %arg0[0, %arg1]: f32 from vector<1x16xf32>
1416  return %0 : f32
1417}
1418
1419// Lowering supports extracting from multi-dim vectors with dynamic indices
1420// provided that only the trailing index is dynamic.
1421
1422// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_inner_dynamic_idx(
1423//       CHECK:   llvm.extractvalue
1424//       CHECK:   llvm.extractelement
1425
1426func.func @extract_scalar_from_vec_2d_f32_inner_dynamic_idx_scalable(%arg0: vector<1x[16]xf32>, %arg1: index) -> f32 {
1427  %0 = vector.extract %arg0[0, %arg1]: f32 from vector<1x[16]xf32>
1428  return %0 : f32
1429}
1430
1431// Lowering supports extracting from multi-dim vectors with dynamic indices
1432// provided that only the trailing index is dynamic.
1433
1434// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_inner_dynamic_idx_scalable(
1435//       CHECK:   llvm.extractvalue
1436//       CHECK:   llvm.extractelement
1437
1438// -----
1439
1440func.func @extract_scalar_from_vec_2d_f32_outer_dynamic_idx(%arg0: vector<1x16xf32>, %arg1: index) -> f32 {
1441  %0 = vector.extract %arg0[%arg1, 0]: f32 from vector<1x16xf32>
1442  return %0 : f32
1443}
1444
1445// Lowering supports extracting from multi-dim vectors with dynamic indices
1446// provided that only the trailing index is dynamic.
1447
1448// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_outer_dynamic_idx(
1449//       CHECK:   vector.extract
1450
1451func.func @extract_scalar_from_vec_2d_f32_outer_dynamic_idx_scalable(%arg0: vector<1x[16]xf32>, %arg1: index) -> f32 {
1452  %0 = vector.extract %arg0[%arg1, 0]: f32 from vector<1x[16]xf32>
1453  return %0 : f32
1454}
1455
1456// Lowering does not support extracting from multi-dim vectors with non trailing
1457// dynamic index, but it shouldn't crash.
1458
1459// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_outer_dynamic_idx_scalable(
1460//       CHECK:   vector.extract
1461
1462// -----
1463
1464func.func @extract_scalar_from_vec_0d_index(%arg0: vector<index>) -> index {
1465  %0 = vector.extract %arg0[]: index from vector<index>
1466  return %0 : index
1467}
1468// CHECK-LABEL: @extract_scalar_from_vec_0d_index(
1469//  CHECK-SAME:   %[[A:.*]]: vector<index>)
1470//       CHECK:   %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<index> to vector<1xi64>
1471//       CHECK:   %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64
1472//       CHECK:   %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<1xi64>
1473//       CHECK:   %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index
1474//       CHECK:   return %[[T3]] : index
1475
1476// -----
1477
1478//===----------------------------------------------------------------------===//
1479// vector.insertelement
1480//===----------------------------------------------------------------------===//
1481
1482func.func @insertelement_into_vec_0d_f32(%arg0: f32, %arg1: vector<f32>) -> vector<f32> {
1483  %1 = vector.insertelement %arg0, %arg1[] : vector<f32>
1484  return %1 : vector<f32>
1485}
1486// CHECK-LABEL: @insertelement_into_vec_0d_f32
1487//  CHECK-SAME:   %[[A:.*]]: f32,
1488//       CHECK:   %[[B:.*]] =  builtin.unrealized_conversion_cast %{{.*}} :
1489//       CHECK:   vector<f32> to vector<1xf32>
1490//       CHECK:   %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
1491//       CHECK:   %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C0]] : {{.*}}] : vector<1xf32>
1492
1493// -----
1494
1495func.func @insertelement_into_vec_1d_f32_idx_as_i32(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> {
1496  %0 = arith.constant 3 : i32
1497  %1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<4xf32>
1498  return %1 : vector<4xf32>
1499}
1500// CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32(
1501//  CHECK-SAME:   %[[A:.*]]: f32,
1502//  CHECK-SAME:   %[[B:.*]]: vector<4xf32>)
1503//       CHECK:   %[[C:.*]] = arith.constant 3 : i32
1504//       CHECK:   %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<4xf32>
1505//       CHECK:   return %[[X]] : vector<4xf32>
1506
1507// -----
1508
1509func.func @insertelement_into_vec_1d_f32_idx_as_i32_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> {
1510  %0 = arith.constant 3 : i32
1511  %1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<[4]xf32>
1512  return %1 : vector<[4]xf32>
1513}
1514// CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32_scalable(
1515//  CHECK-SAME:   %[[A:.*]]: f32,
1516//  CHECK-SAME:   %[[B:.*]]: vector<[4]xf32>)
1517//       CHECK:   %[[C:.*]] = arith.constant 3 : i32
1518//       CHECK:   %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<[4]xf32>
1519//       CHECK:   return %[[X]] : vector<[4]xf32>
1520
1521// -----
1522
1523func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> {
1524  %0 = arith.constant 3 : index
1525  %1 = vector.insertelement %arg0, %arg1[%0 : index] : vector<4xf32>
1526  return %1 : vector<4xf32>
1527}
1528// CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index(
1529//  CHECK-SAME:   %[[A:.*]]: f32,
1530//  CHECK-SAME:   %[[B:.*]]: vector<4xf32>)
1531//       CHECK:   %[[C:.*]] = arith.constant 3 : index
1532//       CHECK:   %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64
1533//       CHECK:   %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<4xf32>
1534//       CHECK:   return %[[X]] : vector<4xf32>
1535
1536// -----
1537
1538func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> {
1539  %0 = arith.constant 3 : index
1540  %1 = vector.insertelement %arg0, %arg1[%0 : index] : vector<[4]xf32>
1541  return %1 : vector<[4]xf32>
1542}
1543// CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable(
1544//  CHECK-SAME:   %[[A:.*]]: f32,
1545//  CHECK-SAME:   %[[B:.*]]: vector<[4]xf32>)
1546//       CHECK:   %[[C:.*]] = arith.constant 3 : index
1547//       CHECK:   %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64
1548//       CHECK:   %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<[4]xf32>
1549//       CHECK:   return %[[X]] : vector<[4]xf32>
1550
1551// -----
1552
1553//===----------------------------------------------------------------------===//
1554// vector.insert
1555//===----------------------------------------------------------------------===//
1556
1557func.func @insert_scalar_into_vec_1d_f32(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> {
1558  %0 = vector.insert %arg0, %arg1[3] : f32 into vector<4xf32>
1559  return %0 : vector<4xf32>
1560}
1561// CHECK-LABEL: @insert_scalar_into_vec_1d_f32
1562//       CHECK:   llvm.mlir.constant(3 : i64) : i64
1563//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32>
1564//       CHECK:   return {{.*}} : vector<4xf32>
1565
1566// -----
1567
1568func.func @insert_scalar_into_vec_1d_f32_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> {
1569  %0 = vector.insert %arg0, %arg1[3] : f32 into vector<[4]xf32>
1570  return %0 : vector<[4]xf32>
1571}
1572// CHECK-LABEL: @insert_scalar_into_vec_1d_f32_scalable
1573//       CHECK:   llvm.mlir.constant(3 : i64) : i64
1574//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<[4]xf32>
1575//       CHECK:   return {{.*}} : vector<[4]xf32>
1576
1577// -----
1578
1579func.func @insert_scalar_into_vec_1d_index(%arg0: index, %arg1: vector<4xindex>) -> vector<4xindex> {
1580  %0 = vector.insert %arg0, %arg1[3] : index into vector<4xindex>
1581  return %0 : vector<4xindex>
1582}
1583// CHECK-LABEL: @insert_scalar_into_vec_1d_index(
1584//  CHECK-SAME:   %[[A:.*]]: index,
1585//  CHECK-SAME:   %[[B:.*]]: vector<4xindex>)
1586//   CHECK-DAG:   %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64
1587//   CHECK-DAG:   %[[T1:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<4xindex> to vector<4xi64>
1588//       CHECK:   %[[T3:.*]] = llvm.mlir.constant(3 : i64) : i64
1589//       CHECK:   %[[T4:.*]] = llvm.insertelement %[[T0]], %[[T1]][%[[T3]] : i64] : vector<4xi64>
1590//       CHECK:   %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : vector<4xi64> to vector<4xindex>
1591//       CHECK:   return %[[T5]] : vector<4xindex>
1592
1593// -----
1594
1595func.func @insert_scalar_into_vec_1d_index_scalable(%arg0: index, %arg1: vector<[4]xindex>) -> vector<[4]xindex> {
1596  %0 = vector.insert %arg0, %arg1[3] : index into vector<[4]xindex>
1597  return %0 : vector<[4]xindex>
1598}
1599// CHECK-LABEL: @insert_scalar_into_vec_1d_index_scalable(
1600//  CHECK-SAME:   %[[A:.*]]: index,
1601//  CHECK-SAME:   %[[B:.*]]: vector<[4]xindex>)
1602//   CHECK-DAG:   %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64
1603//   CHECK-DAG:   %[[T1:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<[4]xindex> to vector<[4]xi64>
1604//       CHECK:   %[[T3:.*]] = llvm.mlir.constant(3 : i64) : i64
1605//       CHECK:   %[[T4:.*]] = llvm.insertelement %[[T0]], %[[T1]][%[[T3]] : i64] : vector<[4]xi64>
1606//       CHECK:   %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : vector<[4]xi64> to vector<[4]xindex>
1607//       CHECK:   return %[[T5]] : vector<[4]xindex>
1608
1609// -----
1610
1611func.func @insert_vec_2d_into_vec_3d_f32(%arg0: vector<8x16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> {
1612  %0 = vector.insert %arg0, %arg1[3] : vector<8x16xf32> into vector<4x8x16xf32>
1613  return %0 : vector<4x8x16xf32>
1614}
1615// CHECK-LABEL: @insert_vec_2d_into_vec_3d_f32
1616//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<16xf32>>>
1617//       CHECK:   return {{.*}} : vector<4x8x16xf32>
1618
1619// -----
1620
1621func.func @insert_vec_2d_into_vec_3d_f32_scalable(%arg0: vector<8x[16]xf32>, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> {
1622  %0 = vector.insert %arg0, %arg1[3] : vector<8x[16]xf32> into vector<4x8x[16]xf32>
1623  return %0 : vector<4x8x[16]xf32>
1624}
1625// CHECK-LABEL: @insert_vec_2d_into_vec_3d_f32_scalable
1626//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<[16]xf32>>>
1627//       CHECK:   return {{.*}} : vector<4x8x[16]xf32>
1628
1629// -----
1630
1631func.func @insert_vec_1d_into_vec_3d_f32(%arg0: vector<16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> {
1632  %0 = vector.insert %arg0, %arg1[3, 7] : vector<16xf32> into vector<4x8x16xf32>
1633  return %0 : vector<4x8x16xf32>
1634}
1635// CHECK-LABEL: @insert_vec_1d_into_vec_3d_f32
1636//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>>
1637//       CHECK:   return {{.*}} : vector<4x8x16xf32>
1638
1639// -----
1640
1641func.func @insert_vec_1d_into_vec_3d_f32_scalable(%arg0: vector<[16]xf32>, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> {
1642  %0 = vector.insert %arg0, %arg1[3, 7] : vector<[16]xf32> into vector<4x8x[16]xf32>
1643  return %0 : vector<4x8x[16]xf32>
1644}
1645// CHECK-LABEL: @insert_vec_1d_into_vec_3d_f32_scalable
1646//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<[16]xf32>>>
1647//       CHECK:   return {{.*}} : vector<4x8x[16]xf32>
1648
1649// -----
1650
1651func.func @insert_scalar_into_vec_3d_f32(%arg0: f32, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> {
1652  %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x16xf32>
1653  return %0 : vector<4x8x16xf32>
1654}
1655// CHECK-LABEL: @insert_scalar_into_vec_3d_f32
1656//       CHECK:   llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>>
1657//       CHECK:   llvm.mlir.constant(15 : i64) : i64
1658//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<16xf32>
1659//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>>
1660//       CHECK:   return {{.*}} : vector<4x8x16xf32>
1661
1662// -----
1663
1664func.func @insert_scalar_into_vec_3d_f32_scalable(%arg0: f32, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> {
1665  %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x[16]xf32>
1666  return %0 : vector<4x8x[16]xf32>
1667}
1668// CHECK-LABEL: @insert_scalar_into_vec_3d_f32_scalable
1669//       CHECK:   llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<[16]xf32>>>
1670//       CHECK:   llvm.mlir.constant(15 : i64) : i64
1671//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<[16]xf32>
1672//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<[16]xf32>>>
1673//       CHECK:   return {{.*}} : vector<4x8x[16]xf32>
1674
1675// -----
1676
1677func.func @insert_scalar_into_vec_1d_f32_dynamic_idx(%arg0: vector<16xf32>, %arg1: f32, %arg2: index)
1678                                      -> vector<16xf32> {
1679  %0 = vector.insert %arg1, %arg0[%arg2]: f32 into vector<16xf32>
1680  return %0 : vector<16xf32>
1681}
1682
1683// CHECK-LABEL: @insert_scalar_into_vec_1d_f32_dynamic_idx
1684//  CHECK-SAME:   %[[DST:.+]]: vector<16xf32>, %[[SRC:.+]]: f32, %[[INDEX:.+]]: index
1685//       CHECK:   %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64
1686//       CHECK:   llvm.insertelement %[[SRC]], %[[DST]][%[[UC]] : i64] : vector<16xf32>
1687
1688// -----
1689
1690func.func @insert_scalar_into_vec_1d_f32_dynamic_idx_scalable(%arg0: vector<[16]xf32>, %arg1: f32, %arg2: index)
1691                                      -> vector<[16]xf32> {
1692  %0 = vector.insert %arg1, %arg0[%arg2]: f32 into vector<[16]xf32>
1693  return %0 : vector<[16]xf32>
1694}
1695
1696// CHECK-LABEL: @insert_scalar_into_vec_1d_f32_dynamic_idx_scalable
1697//  CHECK-SAME:   %[[DST:.+]]: vector<[16]xf32>, %[[SRC:.+]]: f32, %[[INDEX:.+]]: index
1698//       CHECK:   %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64
1699//       CHECK:   llvm.insertelement %[[SRC]], %[[DST]][%[[UC]] : i64] : vector<[16]xf32>
1700
1701// -----
1702
1703func.func @insert_scalar_into_vec_2d_f32_dynamic_idx(%arg0: vector<1x16xf32>, %arg1: f32, %idx: index)
1704                                        -> vector<1x16xf32> {
1705  %0 = vector.insert %arg1, %arg0[0, %idx]: f32 into vector<1x16xf32>
1706  return %0 : vector<1x16xf32>
1707}
1708
1709// Multi-dim vectors are not supported but this test shouldn't crash.
1710
1711// CHECK-LABEL: @insert_scalar_into_vec_2d_f32_dynamic_idx(
1712//       CHECK:   vector.insert
1713
1714// -----
1715
1716func.func @insert_scalar_into_vec_2d_f32_dynamic_idx_scalable(%arg0: vector<1x[16]xf32>, %arg1: f32, %idx: index)
1717                                        -> vector<1x[16]xf32> {
1718  %0 = vector.insert %arg1, %arg0[0, %idx]: f32 into vector<1x[16]xf32>
1719  return %0 : vector<1x[16]xf32>
1720}
1721
1722// Multi-dim vectors are not supported but this test shouldn't crash.
1723
1724// CHECK-LABEL: @insert_scalar_into_vec_2d_f32_dynamic_idx_scalable(
1725//       CHECK:   vector.insert
1726
1727// -----
1728
1729//===----------------------------------------------------------------------===//
1730// vector.type_cast
1731//
1732// TODO: Add tests for for vector.type_cast that would cover scalable vectors
1733//===----------------------------------------------------------------------===//
1734
1735func.func @type_cast_f32(%arg0: memref<8x8x8xf32>) -> memref<vector<8x8x8xf32>> {
1736  %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref<vector<8x8x8xf32>>
1737  return %0 : memref<vector<8x8x8xf32>>
1738}
1739// CHECK-LABEL: @type_cast_f32
1740//       CHECK:   llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)>
1741//       CHECK:   %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
1742//       CHECK:   llvm.insertvalue %[[allocated]], {{.*}}[0] : !llvm.struct<(ptr, ptr, i64)>
1743//       CHECK:   %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
1744//       CHECK:   llvm.insertvalue %[[aligned]], {{.*}}[1] : !llvm.struct<(ptr, ptr, i64)>
1745//       CHECK:   llvm.mlir.constant(0 : index
1746//       CHECK:   llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr, ptr, i64)>
1747
1748// NOTE: No test for scalable vectors - the input memref is fixed size.
1749
1750// -----
1751
1752func.func @type_cast_index(%arg0: memref<8x8x8xindex>) -> memref<vector<8x8x8xindex>> {
1753  %0 = vector.type_cast %arg0: memref<8x8x8xindex> to memref<vector<8x8x8xindex>>
1754  return %0 : memref<vector<8x8x8xindex>>
1755}
1756// CHECK-LABEL: @type_cast_index(
1757// CHECK-SAME: %[[A:.*]]: memref<8x8x8xindex>)
1758//       CHECK:   %{{.*}} = builtin.unrealized_conversion_cast %[[A]] : memref<8x8x8xindex> to !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
1759
1760//       CHECK:   %{{.*}} = builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(ptr, ptr, i64)> to memref<vector<8x8x8xindex>>
1761
1762// NOTE: No test for scalable vectors - the input memref is fixed size.
1763
1764// -----
1765
1766func.func @type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref<vector<8x8x8xf32>, 3> {
1767  %0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref<vector<8x8x8xf32>, 3>
1768  return %0 : memref<vector<8x8x8xf32>, 3>
1769}
1770// CHECK-LABEL: @type_cast_non_zero_addrspace
1771//       CHECK:   llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64)>
1772//       CHECK:   %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<3 x i64>, array<3 x i64>)>
1773//       CHECK:   llvm.insertvalue %[[allocated]], {{.*}}[0] : !llvm.struct<(ptr<3>, ptr<3>, i64)>
1774//       CHECK:   %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<3 x i64>, array<3 x i64>)>
1775//       CHECK:   llvm.insertvalue %[[aligned]], {{.*}}[1] : !llvm.struct<(ptr<3>, ptr<3>, i64)>
1776//       CHECK:   llvm.mlir.constant(0 : index
1777//       CHECK:   llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr<3>, ptr<3>, i64)>
1778
1779// NOTE: No test for scalable vectors - the input memref is fixed size.
1780
1781// -----
1782
1783//===----------------------------------------------------------------------===//
1784// vector.print
1785//===----------------------------------------------------------------------===//
1786
1787func.func @print_scalar_i1(%arg0: i1) {
1788  vector.print %arg0 : i1
1789  return
1790}
1791//
1792// Type "boolean" always uses zero extension.
1793//
1794// CHECK-LABEL: @print_scalar_i1(
1795// CHECK-SAME: %[[A:.*]]: i1)
1796//       CHECK: %[[S:.*]] = arith.extui %[[A]] : i1 to i64
1797//       CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
1798//       CHECK: llvm.call @printNewline() : () -> ()
1799
1800// -----
1801
1802func.func @print_scalar_i4(%arg0: i4) {
1803  vector.print %arg0 : i4
1804  return
1805}
1806// CHECK-LABEL: @print_scalar_i4(
1807// CHECK-SAME: %[[A:.*]]: i4)
1808//       CHECK: %[[S:.*]] = arith.extsi %[[A]] : i4 to i64
1809//       CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
1810//       CHECK: llvm.call @printNewline() : () -> ()
1811
1812// -----
1813
1814func.func @print_scalar_si4(%arg0: si4) {
1815  vector.print %arg0 : si4
1816  return
1817}
1818// CHECK-LABEL: @print_scalar_si4(
1819// CHECK-SAME: %[[A:.*]]: si4)
1820//       CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : si4 to i4
1821//       CHECK: %[[S:.*]] = arith.extsi %[[C]] : i4 to i64
1822//       CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
1823//       CHECK: llvm.call @printNewline() : () -> ()
1824
1825// -----
1826
1827func.func @print_scalar_ui4(%arg0: ui4) {
1828  vector.print %arg0 : ui4
1829  return
1830}
1831// CHECK-LABEL: @print_scalar_ui4(
1832// CHECK-SAME: %[[A:.*]]: ui4)
1833//       CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui4 to i4
1834//       CHECK: %[[S:.*]] = arith.extui %[[C]] : i4 to i64
1835//       CHECK: llvm.call @printU64(%[[S]]) : (i64) -> ()
1836//       CHECK: llvm.call @printNewline() : () -> ()
1837
1838// -----
1839
1840func.func @print_scalar_i32(%arg0: i32) {
1841  vector.print %arg0 : i32
1842  return
1843}
1844// CHECK-LABEL: @print_scalar_i32(
1845// CHECK-SAME: %[[A:.*]]: i32)
1846//       CHECK: %[[S:.*]] = arith.extsi %[[A]] : i32 to i64
1847//       CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
1848//       CHECK: llvm.call @printNewline() : () -> ()
1849
1850// -----
1851
1852func.func @print_scalar_ui32(%arg0: ui32) {
1853  vector.print %arg0 : ui32
1854  return
1855}
1856// CHECK-LABEL: @print_scalar_ui32(
1857// CHECK-SAME: %[[A:.*]]: ui32)
1858//       CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui32 to i32
1859//       CHECK: %[[S:.*]] = arith.extui %[[C]] : i32 to i64
1860//       CHECK: llvm.call @printU64(%[[S]]) : (i64) -> ()
1861
1862// -----
1863
1864func.func @print_scalar_i40(%arg0: i40) {
1865  vector.print %arg0 : i40
1866  return
1867}
1868// CHECK-LABEL: @print_scalar_i40(
1869// CHECK-SAME: %[[A:.*]]: i40)
1870//       CHECK: %[[S:.*]] = arith.extsi %[[A]] : i40 to i64
1871//       CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
1872//       CHECK: llvm.call @printNewline() : () -> ()
1873
1874// -----
1875
1876func.func @print_scalar_si40(%arg0: si40) {
1877  vector.print %arg0 : si40
1878  return
1879}
1880// CHECK-LABEL: @print_scalar_si40(
1881// CHECK-SAME: %[[A:.*]]: si40)
1882//       CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : si40 to i40
1883//       CHECK: %[[S:.*]] = arith.extsi %[[C]] : i40 to i64
1884//       CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
1885//       CHECK: llvm.call @printNewline() : () -> ()
1886
1887// -----
1888
1889func.func @print_scalar_ui40(%arg0: ui40) {
1890  vector.print %arg0 : ui40
1891  return
1892}
1893// CHECK-LABEL: @print_scalar_ui40(
1894// CHECK-SAME: %[[A:.*]]: ui40)
1895//       CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui40 to i40
1896//       CHECK: %[[S:.*]] = arith.extui %[[C]] : i40 to i64
1897//       CHECK: llvm.call @printU64(%[[S]]) : (i64) -> ()
1898//       CHECK: llvm.call @printNewline() : () -> ()
1899
1900// -----
1901
1902func.func @print_scalar_i64(%arg0: i64) {
1903  vector.print %arg0 : i64
1904  return
1905}
1906// CHECK-LABEL: @print_scalar_i64(
1907// CHECK-SAME: %[[A:.*]]: i64)
1908//       CHECK:    llvm.call @printI64(%[[A]]) : (i64) -> ()
1909//       CHECK:    llvm.call @printNewline() : () -> ()
1910
1911// -----
1912
1913func.func @print_scalar_ui64(%arg0: ui64) {
1914  vector.print %arg0 : ui64
1915  return
1916}
1917// CHECK-LABEL: @print_scalar_ui64(
1918// CHECK-SAME: %[[A:.*]]: ui64)
1919//       CHECK:    %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui64 to i64
1920//       CHECK:    llvm.call @printU64(%[[C]]) : (i64) -> ()
1921//       CHECK:    llvm.call @printNewline() : () -> ()
1922
1923// -----
1924
1925func.func @print_scalar_index(%arg0: index) {
1926  vector.print %arg0 : index
1927  return
1928}
1929// CHECK-LABEL: @print_scalar_index(
1930// CHECK-SAME: %[[A:.*]]: index)
1931//       CHECK:    %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64
1932//       CHECK:    llvm.call @printU64(%[[C]]) : (i64) -> ()
1933//       CHECK:    llvm.call @printNewline() : () -> ()
1934
1935// -----
1936
1937func.func @print_scalar_f32(%arg0: f32) {
1938  vector.print %arg0 : f32
1939  return
1940}
1941// CHECK-LABEL: @print_scalar_f32(
1942// CHECK-SAME: %[[A:.*]]: f32)
1943//       CHECK:    llvm.call @printF32(%[[A]]) : (f32) -> ()
1944//       CHECK:    llvm.call @printNewline() : () -> ()
1945
1946// -----
1947
1948func.func @print_scalar_f64(%arg0: f64) {
1949  vector.print %arg0 : f64
1950  return
1951}
1952// CHECK-LABEL: @print_scalar_f64(
1953// CHECK-SAME: %[[A:.*]]: f64)
1954//       CHECK:    llvm.call @printF64(%[[A]]) : (f64) -> ()
1955//       CHECK:    llvm.call @printNewline() : () -> ()
1956
1957// -----
1958
1959// CHECK-LABEL: module {
1960// CHECK: llvm.func @printString(!llvm.ptr)
1961// CHECK: llvm.mlir.global private constant @[[GLOBAL_STR:.*]]({{.*}})
1962// CHECK: @print_string
1963//       CHECK-NEXT: %[[GLOBAL_ADDR:.*]] = llvm.mlir.addressof @[[GLOBAL_STR]] : !llvm.ptr
1964//       CHECK-NEXT: %[[STR_PTR:.*]] = llvm.getelementptr %[[GLOBAL_ADDR]][0] : (!llvm.ptr) -> !llvm.ptr
1965//       CHECK-NEXT: llvm.call @printString(%[[STR_PTR]]) : (!llvm.ptr) -> ()
1966func.func @print_string() {
1967  vector.print str "Hello, World!"
1968  return
1969}
1970
1971// -----
1972
1973//===----------------------------------------------------------------------===//
1974// vector.extract_strided_slice
1975//===----------------------------------------------------------------------===//
1976
1977func.func @extract_strided_slice_f32_1d_from_1d(%arg0: vector<4xf32>) -> vector<2xf32> {
1978  %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
1979  return %0 : vector<2xf32>
1980}
1981// CHECK-LABEL: @extract_strided_slice_f32_1d_from_1d
1982//  CHECK-SAME:    %[[A:.*]]: vector<4xf32>)
1983//       CHECK:    %[[T0:.*]] = llvm.shufflevector %[[A]], %[[A]] [2, 3] : vector<4xf32>
1984//       CHECK:    return %[[T0]] : vector<2xf32>
1985
1986// NOTE: For scalable vectors we could only extract vector<[4]xf32> from vector<[4]xf32>, but that would be a NOP.
1987
1988// -----
1989
1990func.func @extract_strided_slice_index_1d_from_1d(%arg0: vector<4xindex>) -> vector<2xindex> {
1991  %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xindex> to vector<2xindex>
1992  return %0 : vector<2xindex>
1993}
1994// CHECK-LABEL: @extract_strided_slice_index_1d_from_1d
1995//  CHECK-SAME:    %[[A:.*]]: vector<4xindex>)
1996//       CHECK:    %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4xindex> to vector<4xi64>
1997//       CHECK:    %[[T2:.*]] = llvm.shufflevector %[[T0]], %[[T0]] [2, 3] : vector<4xi64>
1998//       CHECK:    %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<2xi64> to vector<2xindex>
1999//       CHECK:    return %[[T3]] : vector<2xindex>
2000
2001// NOTE: For scalable vectors we could only extract vector<[4]xindex> from vector<[4]xindex>, but that would be a NOP.
2002
2003// -----
2004
2005func.func @extract_strided_slice_f32_1d_from_2d(%arg0: vector<4x8xf32>) -> vector<2x8xf32> {
2006  %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32>
2007  return %0 : vector<2x8xf32>
2008}
2009// CHECK-LABEL: @extract_strided_slice_f32_1d_from_2d(
2010//  CHECK-SAME:    %[[ARG:.*]]: vector<4x8xf32>)
2011//       CHECK:    %[[A:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>>
2012//       CHECK:    %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<8xf32>>
2013//       CHECK:    %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>>
2014//       CHECK:    %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vector<8xf32>>
2015//       CHECK:    %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>>
2016//       CHECK:    %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vector<8xf32>>
2017//       CHECK:    %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<2 x vector<8xf32>> to vector<2x8xf32>
2018//       CHECK:    return %[[T5]]
2019
2020// -----
2021
2022func.func @extract_strided_slice_f32_1d_from_2d_scalable(%arg0: vector<4x[8]xf32>) -> vector<2x[8]xf32> {
2023  %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x[8]xf32> to vector<2x[8]xf32>
2024  return %0 : vector<2x[8]xf32>
2025}
2026// CHECK-LABEL:   func.func @extract_strided_slice_f32_1d_from_2d_scalable(
2027//  CHECK-SAME:    %[[ARG:.*]]: vector<4x[8]xf32>)
2028//       CHECK:    %[[A:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x[8]xf32> to !llvm.array<4 x vector<[8]xf32>>
2029//       CHECK:    %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<[8]xf32>>
2030//       CHECK:    %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<[8]xf32>>
2031//       CHECK:    %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vector<[8]xf32>>
2032//       CHECK:    %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<[8]xf32>>
2033//       CHECK:    %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vector<[8]xf32>>
2034//       CHECK:    %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<2 x vector<[8]xf32>> to vector<2x[8]xf32>
2035//       CHECK:    return %[[T5]]
2036
2037// -----
2038
2039func.func @extract_strided_slice_f32_2d_from_2d(%arg0: vector<4x8xf32>) -> vector<2x2xf32> {
2040  %0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32>
2041  return %0 : vector<2x2xf32>
2042}
2043// CHECK-LABEL: @extract_strided_slice_f32_2d_from_2d(
2044//  CHECK-SAME:    %[[ARG:.*]]: vector<4x8xf32>)
2045//       CHECK:    %[[A:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>>
2046//       CHECK:    %[[VAL_2:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>
2047//       CHECK:    %[[VAL_6:.*]] = builtin.unrealized_conversion_cast %[[VAL_2]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>>
2048//       CHECK:    %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>>
2049//       CHECK:    %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2, 3] : vector<8xf32>
2050//       CHECK:    %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[VAL_6]][0] : !llvm.array<2 x vector<2xf32>>
2051//       CHECK:    %[[T5:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>>
2052//       CHECK:    %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T5]] [2, 3] : vector<8xf32>
2053//       CHECK:    %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T4]][1] : !llvm.array<2 x vector<2xf32>>
2054//       CHECK:    %[[VAL_12:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<2 x vector<2xf32>> to vector<2x2xf32>
2055//       CHECK:    return %[[VAL_12]] : vector<2x2xf32>
2056
2057// -----
2058
2059// NOTE: For scalable vectors, we can only extract "full" scalable dimensions
2060// (e.g. [8] from [8], but not [4] from [8]).
2061
2062func.func @extract_strided_slice_f32_2d_from_2d_scalable(%arg0: vector<4x[8]xf32>) -> vector<2x[8]xf32> {
2063  %0 = vector.extract_strided_slice %arg0 {offsets = [2, 0], sizes = [2, 8], strides = [1, 1]} : vector<4x[8]xf32> to vector<2x[8]xf32>
2064  return %0 : vector<2x[8]xf32>
2065}
2066// CHECK-LABEL: @extract_strided_slice_f32_2d_from_2d_scalable(
2067//  CHECK-SAME:     %[[ARG:.*]]: vector<4x[8]xf32>)
2068// CHECK:           %[[T1:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x[8]xf32> to !llvm.array<4 x vector<[8]xf32>>
2069// CHECK:           %[[T3:.*]] = arith.constant dense<0.000000e+00> : vector<2x[8]xf32>
2070// CHECK:           %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : vector<2x[8]xf32> to !llvm.array<2 x vector<[8]xf32>>
2071// CHECK:           %[[T5:.*]] = llvm.extractvalue %[[T1]][2] : !llvm.array<4 x vector<[8]xf32>>
2072// CHECK:           %[[T6:.*]] = llvm.insertvalue %[[T5]], %[[T4]][0] : !llvm.array<2 x vector<[8]xf32>>
2073// CHECK:           %[[T7:.*]] = llvm.extractvalue %[[T1]][3] : !llvm.array<4 x vector<[8]xf32>>
2074// CHECK:           %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T6]][1] : !llvm.array<2 x vector<[8]xf32>>
2075// CHECK:           %[[T9:.*]] = builtin.unrealized_conversion_cast %[[T8]] : !llvm.array<2 x vector<[8]xf32>> to vector<2x[8]xf32>
2076// CHECK:           return %[[T9]] : vector<2x[8]xf32>
2077
2078// -----
2079
2080//===----------------------------------------------------------------------===//
2081// vector.insert_strided_slice
2082//===----------------------------------------------------------------------===//
2083
2084func.func @insert_strided_slice_f32_2d_into_3d(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vector<4x4x4xf32> {
2085  %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32>
2086  return %0 : vector<4x4x4xf32>
2087}
2088// CHECK-LABEL: @insert_strided_slice_f32_2d_into_3d
2089//       CHECK:    llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>>
2090
2091// -----
2092
2093func.func @insert_strided_slice_f32_2d_into_3d_scalable(%b: vector<4x[4]xf32>, %c: vector<4x4x[4]xf32>) -> vector<4x4x[4]xf32> {
2094  %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x[4]xf32> into vector<4x4x[4]xf32>
2095  return %0 : vector<4x4x[4]xf32>
2096}
2097// CHECK-LABEL: @insert_strided_slice_f32_2d_into_3d_scalable
2098//       CHECK:    llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<[4]xf32>>>
2099
2100// -----
2101
2102func.func @insert_strided_index_slice_index_2d_into_3d(%b: vector<4x4xindex>, %c: vector<4x4x4xindex>) -> vector<4x4x4xindex> {
2103  %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xindex> into vector<4x4x4xindex>
2104  return %0 : vector<4x4x4xindex>
2105}
2106// CHECK-LABEL: @insert_strided_index_slice_index_2d_into_3d
2107//       CHECK:    llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xi64>>>
2108
2109// -----
2110
2111func.func @insert_strided_index_slice_index_2d_into_3d_scalable(%b: vector<4x[4]xindex>, %c: vector<4x4x[4]xindex>) -> vector<4x4x[4]xindex> {
2112  %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x[4]xindex> into vector<4x4x[4]xindex>
2113  return %0 : vector<4x4x[4]xindex>
2114}
2115// CHECK-LABEL: @insert_strided_index_slice_index_2d_into_3d_scalable
2116//       CHECK:    llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<[4]xi64>>>
2117
2118// -----
2119
2120func.func @insert_strided_slice_f32_2d_into_2d(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector<4x4xf32> {
2121  %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
2122  return %0 : vector<4x4xf32>
2123}
2124
2125// CHECK-LABEL: @insert_strided_slice_f32_2d_into_2d
2126//
2127// Subvector vector<2xf32> @0 into vector<4xf32> @2
2128//       CHECK:    %[[V2_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<2xf32>>
2129//       CHECK:    %[[V4_0:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vector<4xf32>>
2130// Element @0 -> element @2
2131//       CHECK:    %[[R4_0:.*]] = llvm.shufflevector %[[V2_0]], %[[V2_0]] [0, 1, 0, 0] : vector<2xf32>
2132//       CHECK:    %[[R4_1:.*]] = llvm.shufflevector %[[R4_0]], %[[V4_0]] [4, 5, 0, 1] : vector<4xf32>
2133//       CHECK:    llvm.insertvalue %[[R4_1]], {{.*}}[2] : !llvm.array<4 x vector<4xf32>>
2134//
2135// Subvector vector<2xf32> @1 into vector<4xf32> @3
2136//       CHECK:    %[[V2_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<2xf32>>
2137//       CHECK:    %[[V4_3:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vector<4xf32>>
2138// Element @0 -> element @2
2139//       CHECK:    %[[R4_2:.*]] = llvm.shufflevector %[[V2_1]], %[[V2_1]] [0, 1, 0, 0] : vector<2xf32>
2140//       CHECK:    %[[R4_3:.*]] = llvm.shufflevector %[[R4_2]], %[[V4_3]] [4, 5, 0, 1] : vector<4xf32>
2141//       CHECK:    llvm.insertvalue %[[R4_3]], {{.*}}[3] : !llvm.array<4 x vector<4xf32>>
2142
2143// -----
2144
2145// NOTE: For scalable dimensions, the corresponding "base" size must match
2146// (i.e. we can only insert "full" scalable dimensions, e.g. [2] into [2], but
2147// not [2] from [4]).
2148
2149func.func @insert_strided_slice_f32_2d_into_2d_scalable(%a: vector<2x[2]xf32>, %b: vector<4x[2]xf32>) -> vector<4x[2]xf32> {
2150  %0 = vector.insert_strided_slice %a, %b {offsets = [2, 0], strides = [1, 1]} : vector<2x[2]xf32> into vector<4x[2]xf32>
2151  return %0 : vector<4x[2]xf32>
2152}
2153
2154// CHECK-LABEL:   func.func @insert_strided_slice_f32_2d_into_2d_scalable
2155// Subvector vector<[2]xf32> @0 into vector<[4]xf32> @2
2156// CHECK:           %[[A_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<[2]xf32>>
2157// Element @0 -> element @2
2158// CHECK:           %[[B_UPDATED:.*]] = llvm.insertvalue %[[A_0]], {{.*}}[2] : !llvm.array<4 x vector<[2]xf32>>
2159// Subvector vector<[2]xf32> @1 into vector<[4]xf32> @3
2160// CHECK:           %[[A_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<[2]xf32>>
2161// Element @0 -> element @2
2162// CHECK:           llvm.insertvalue %[[A_1]], %[[B_UPDATED]][3] : !llvm.array<4 x vector<[2]xf32>>
2163
2164// -----
2165
2166func.func @insert_strided_slice_f32_2d_into_3d(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) -> vector<16x4x8xf32> {
2167  %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [0, 0, 2], strides = [1, 1]}:
2168        vector<2x4xf32> into vector<16x4x8xf32>
2169  return %0 : vector<16x4x8xf32>
2170}
2171// CHECK-LABEL: func @insert_strided_slice_f32_2d_into_3d
2172//       CHECK:    %[[V4_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<4xf32>>
2173//       CHECK:    %[[V4_0_0:.*]] = llvm.extractvalue {{.*}}[0, 0] : !llvm.array<16 x array<4 x vector<8xf32>>>
2174//       CHECK:    %[[R8_0:.*]] = llvm.shufflevector %[[V4_0]], %[[V4_0]] [0, 1, 2, 3, 0, 0, 0, 0] : vector<4xf32>
2175//       CHECK:    %[[R8_1:.*]] = llvm.shufflevector %[[R8_0:.*]], %[[V4_0_0]] [8, 9, 0, 1, 2, 3, 14, 15] : vector<8xf32>
2176//       CHECK:    llvm.insertvalue %[[R8_1]], {{.*}}[0] : !llvm.array<4 x vector<8xf32>>
2177
2178//       CHECK:    %[[V4_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<4xf32>>
2179//       CHECK:    %[[V4_0_1:.*]] = llvm.extractvalue {{.*}}[0, 1] : !llvm.array<16 x array<4 x vector<8xf32>>>
2180//       CHECK:    %[[R8_2:.*]] = llvm.shufflevector %[[V4_1]], %[[V4_1]] [0, 1, 2, 3, 0, 0, 0, 0] : vector<4xf32>
2181//       CHECK:    %[[R8_3:.*]] = llvm.shufflevector %[[R8_2]], %[[V4_0_1]] [8, 9, 0, 1, 2, 3, 14, 15] : vector<8xf32>
2182//       CHECK:    llvm.insertvalue %[[R8_3]], {{.*}}[1] : !llvm.array<4 x vector<8xf32>>
2183
2184// -----
2185
2186// NOTE: For scalable dimensions, the corresponding "base" size must match
2187// (i.e. we can only insert "full" scalable dimensions, e.g. [4] into [4], but
2188// not [4] from [8]).
2189
2190func.func @insert_strided_slice_f32_2d_into_3d_scalable(%arg0: vector<2x[4]xf32>, %arg1: vector<16x4x[4]xf32>) -> vector<16x4x[4]xf32> {
2191  %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [3, 2, 0], strides = [1, 1]}:
2192        vector<2x[4]xf32> into vector<16x4x[4]xf32>
2193  return %0 : vector<16x4x[4]xf32>
2194}
2195
2196// CHECK-LABEL:   func.func @insert_strided_slice_f32_2d_into_3d_scalable(
2197
2198// Subvector vector<4x[4]xf32> from vector<16x4x[4]xf32> @3
2199// CHECK:           %[[ARG_1_0:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.array<16 x array<4 x vector<[4]xf32>>>
2200
2201// Subvector vector<[4]xf32> @0 into vector<4x[4]xf32> @2
2202// CHECK:           %[[ARG_0_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<[4]xf32>>
2203// CHECK:           %[[B_UPDATED_0:.*]] = llvm.insertvalue %[[ARG_0_0]], %[[ARG_1_0]][2] : !llvm.array<4 x vector<[4]xf32>>
2204
2205// Subvector vector<[4]xf32> @1 into vector<4x[4]xf32> @3
2206// CHECK:           %[[ARG_0_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<[4]xf32>>
2207// CHECK:           %[[B_UPDATED_1:.*]] = llvm.insertvalue %[[ARG_0_1]], %[[B_UPDATED_0]][3] : !llvm.array<4 x vector<[4]xf32>>
2208
2209// Subvector vector<4x[4]xf32> into vector<16x4x[4]xf32> @3
2210// CHECK:           llvm.insertvalue %[[B_UPDATED_1]], {{.*}}[3] : !llvm.array<16 x array<4 x vector<[4]xf32>>>
2211
2212// -----
2213
2214//===----------------------------------------------------------------------===//
2215// vector.fma
2216//===----------------------------------------------------------------------===//
2217
2218func.func @fma(%vec_1d: vector<8xf32>, %vec_2d: vector<2x4xf32>, %vec_3d: vector<1x1x1xf32>, %vec_0d: vector<f32>) -> (vector<8xf32>, vector<2x4xf32>, vector<1x1x1xf32>, vector<f32>) {
2219  // CHECK-LABEL: @fma
2220  //  CHECK-SAME: %[[VEC_1D:.*]]: vector<8xf32>
2221  //  CHECK-SAME: %[[VEC_2D:.*]]: vector<2x4xf32>
2222  //  CHECK-SAME: %[[VEC_3D:.*]]: vector<1x1x1xf32>
2223  //       CHECK: %[[VEC_2D_CAST:.*]] = builtin.unrealized_conversion_cast %[[VEC_2D]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
2224  //       CHECK: llvm.intr.fmuladd
2225  //  CHECK-SAME:   (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32>
2226  %0 = vector.fma %vec_1d, %vec_1d, %vec_1d : vector<8xf32>
2227
2228  //       CHECK: %[[VEC_2D_00:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>>
2229  //       CHECK: %[[VEC_2D_01:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>>
2230  //       CHECK: %[[VEC_2D_02:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>>
2231  //       CHECK: %[[VEC_2D_ADD_1:.*]] = llvm.intr.fmuladd(%[[VEC_2D_00]], %[[VEC_2D_01]], %[[VEC_2D_02]]) :
2232  //  CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
2233  //       CHECK: llvm.insertvalue %[[VEC_2D_ADD_1]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>>
2234  //       CHECK: %[[VEC_2D_10:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>>
2235  //       CHECK: %[[VEC_2D_11:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>>
2236  //       CHECK: %[[VEC_2D_12:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>>
2237  //       CHECK: %[[VEC_2D_ADD_2:.*]] = llvm.intr.fmuladd(%[[VEC_2D_10]], %[[VEC_2D_11]], %[[VEC_2D_12]]) :
2238  //  CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
2239  //       CHECK: llvm.insertvalue %[[VEC_2D_ADD_2]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>>
2240  %1 = vector.fma %vec_2d, %vec_2d, %vec_2d : vector<2x4xf32>
2241
2242  //       CHECK: %[[C0:.*]] = llvm.intr.fmuladd
2243  //  CHECK-SAME:   (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32>
2244  %2 = vector.fma %vec_3d, %vec_3d, %vec_3d : vector<1x1x1xf32>
2245
2246  //       CHECK: %[[D0:.*]] = llvm.intr.fmuladd
2247  //  CHECK-SAME:   (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32>
2248  %3 = vector.fma %vec_0d, %vec_0d, %vec_0d : vector<f32>
2249
2250  return %0, %1, %2, %3: vector<8xf32>, vector<2x4xf32>, vector<1x1x1xf32>, vector<f32>
2251}
2252
2253// -----
2254
2255func.func @fma_scalable(%vec_1d: vector<[8]xf32>, %vec_2d: vector<2x[4]xf32>, %vec_3d: vector<1x1x[1]xf32>, %vec_0d: vector<f32>) -> (vector<[8]xf32>, vector<2x[4]xf32>, vector<1x1x[1]xf32>) {
2256  // CHECK-LABEL: @fma_scalable
2257  //  CHECK-SAME: %[[VEC_1D:.*]]: vector<[8]xf32>
2258  //  CHECK-SAME: %[[VEC_2D:.*]]: vector<2x[4]xf32>
2259  //  CHECK-SAME: %[[VEC_3D:.*]]: vector<1x1x[1]xf32>
2260  //       CHECK: %[[VEC_2D_CAST:.*]] = builtin.unrealized_conversion_cast %[[VEC_2D]] : vector<2x[4]xf32> to !llvm.array<2 x vector<[4]xf32>>
2261  //       CHECK: llvm.intr.fmuladd
2262  //  CHECK-SAME:   (vector<[8]xf32>, vector<[8]xf32>, vector<[8]xf32>) -> vector<[8]xf32>
2263  %0 = vector.fma %vec_1d, %vec_1d, %vec_1d : vector<[8]xf32>
2264
2265  //       CHECK: %[[VEC_2D_00:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>>
2266  //       CHECK: %[[VEC_2D_01:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>>
2267  //       CHECK: %[[VEC_2D_02:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>>
2268  //       CHECK: %[[VEC_2D_ADD_1:.*]] = llvm.intr.fmuladd(%[[VEC_2D_00]], %[[VEC_2D_01]], %[[VEC_2D_02]]) :
2269  //  CHECK-SAME: (vector<[4]xf32>, vector<[4]xf32>, vector<[4]xf32>) -> vector<[4]xf32>
2270  //       CHECK: llvm.insertvalue %[[VEC_2D_ADD_1]], {{.*}}[0] : !llvm.array<2 x vector<[4]xf32>>
2271  //       CHECK: %[[VEC_2D_10:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>>
2272  //       CHECK: %[[VEC_2D_11:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>>
2273  //       CHECK: %[[VEC_2D_12:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>>
2274  //       CHECK: %[[VEC_2D_ADD_2:.*]] = llvm.intr.fmuladd(%[[VEC_2D_10]], %[[VEC_2D_11]], %[[VEC_2D_12]]) :
2275  //  CHECK-SAME: (vector<[4]xf32>, vector<[4]xf32>, vector<[4]xf32>) -> vector<[4]xf32>
2276  //       CHECK: llvm.insertvalue %[[VEC_2D_ADD_2]], {{.*}}[1] : !llvm.array<2 x vector<[4]xf32>>
2277  %1 = vector.fma %vec_2d, %vec_2d, %vec_2d : vector<2x[4]xf32>
2278
2279  //       CHECK: %[[C0:.*]] = llvm.intr.fmuladd
2280  //  CHECK-SAME:   (vector<[1]xf32>, vector<[1]xf32>, vector<[1]xf32>) -> vector<[1]xf32>
2281  %2 = vector.fma %vec_3d, %vec_3d, %vec_3d : vector<1x1x[1]xf32>
2282
2283  return %0, %1, %2: vector<[8]xf32>, vector<2x[4]xf32>, vector<1x1x[1]xf32>
2284}
2285
2286// -----
2287
2288//===----------------------------------------------------------------------===//
2289// vector.reduction
2290//===----------------------------------------------------------------------===//
2291
2292func.func @reduce_0d_f32(%arg0: vector<f32>) -> f32 {
2293  %0 = vector.reduction <add>, %arg0 : vector<f32> into f32
2294  return %0 : f32
2295}
2296// CHECK-LABEL: @reduce_0d_f32(
2297// CHECK-SAME: %[[A:.*]]: vector<f32>)
2298//      CHECK: %[[CA:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<f32> to vector<1xf32>
2299//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
2300//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[CA]])
2301// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f32, vector<1xf32>) -> f32
2302//      CHECK: return %[[V]] : f32
2303
2304// -----
2305
2306func.func @reduce_f16(%arg0: vector<16xf16>) -> f16 {
2307  %0 = vector.reduction <add>, %arg0 : vector<16xf16> into f16
2308  return %0 : f16
2309}
2310// CHECK-LABEL: @reduce_f16(
2311// CHECK-SAME: %[[A:.*]]: vector<16xf16>)
2312//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : f16
2313//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
2314// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f16, vector<16xf16>) -> f16
2315//      CHECK: return %[[V]] : f16
2316
2317// -----
2318
2319func.func @reduce_f16_scalable(%arg0: vector<[16]xf16>) -> f16 {
2320  %0 = vector.reduction <add>, %arg0 : vector<[16]xf16> into f16
2321  return %0 : f16
2322}
2323// CHECK-LABEL: @reduce_f16_scalable(
2324// CHECK-SAME: %[[A:.*]]: vector<[16]xf16>)
2325//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : f16
2326//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
2327// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f16, vector<[16]xf16>) -> f16
2328//      CHECK: return %[[V]] : f16
2329
2330// -----
2331
2332func.func @reduce_f32(%arg0: vector<16xf32>) -> f32 {
2333  %0 = vector.reduction <add>, %arg0 : vector<16xf32> into f32
2334  return %0 : f32
2335}
2336// CHECK-LABEL: @reduce_f32(
2337// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
2338//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
2339//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
2340// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f32, vector<16xf32>) -> f32
2341//      CHECK: return %[[V]] : f32
2342
2343// -----
2344
2345func.func @reduce_f32_scalable(%arg0: vector<[16]xf32>) -> f32 {
2346  %0 = vector.reduction <add>, %arg0 : vector<[16]xf32> into f32
2347  return %0 : f32
2348}
2349// CHECK-LABEL: @reduce_f32_scalable(
2350// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>)
2351//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
2352//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
2353// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f32, vector<[16]xf32>) -> f32
2354//      CHECK: return %[[V]] : f32
2355
2356// -----
2357
2358func.func @reduce_f64(%arg0: vector<16xf64>) -> f64 {
2359  %0 = vector.reduction <add>, %arg0 : vector<16xf64> into f64
2360  return %0 : f64
2361}
2362// CHECK-LABEL: @reduce_f64(
2363// CHECK-SAME: %[[A:.*]]: vector<16xf64>)
2364//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64
2365//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
2366// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f64, vector<16xf64>) -> f64
2367//      CHECK: return %[[V]] : f64
2368
2369// -----
2370
2371func.func @reduce_f64_scalable(%arg0: vector<[16]xf64>) -> f64 {
2372  %0 = vector.reduction <add>, %arg0 : vector<[16]xf64> into f64
2373  return %0 : f64
2374}
2375// CHECK-LABEL: @reduce_f64_scalable(
2376// CHECK-SAME: %[[A:.*]]: vector<[16]xf64>)
2377//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64
2378//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
2379// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f64, vector<[16]xf64>) -> f64
2380//      CHECK: return %[[V]] : f64
2381
2382// -----
2383
2384func.func @reduce_i8(%arg0: vector<16xi8>) -> i8 {
2385  %0 = vector.reduction <add>, %arg0 : vector<16xi8> into i8
2386  return %0 : i8
2387}
2388// CHECK-LABEL: @reduce_i8(
2389// CHECK-SAME: %[[A:.*]]: vector<16xi8>)
2390//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2391//      CHECK: return %[[V]] : i8
2392
2393// -----
2394
2395func.func @reduce_i8_scalable(%arg0: vector<[16]xi8>) -> i8 {
2396  %0 = vector.reduction <add>, %arg0 : vector<[16]xi8> into i8
2397  return %0 : i8
2398}
2399// CHECK-LABEL: @reduce_i8_scalable(
2400// CHECK-SAME: %[[A:.*]]: vector<[16]xi8>)
2401//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2402//      CHECK: return %[[V]] : i8
2403
2404// -----
2405
2406func.func @reduce_i32(%arg0: vector<16xi32>) -> i32 {
2407  %0 = vector.reduction <add>, %arg0 : vector<16xi32> into i32
2408  return %0 : i32
2409}
2410// CHECK-LABEL: @reduce_i32(
2411// CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2412//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2413//      CHECK: return %[[V]] : i32
2414
2415// -----
2416
2417func.func @reduce_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2418  %0 = vector.reduction <add>, %arg0 : vector<[16]xi32> into i32
2419  return %0 : i32
2420}
2421// CHECK-LABEL: @reduce_i32_scalable(
2422// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2423//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2424//      CHECK: return %[[V]] : i32
2425
2426// -----
2427
2428func.func @reduce_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2429  %0 = vector.reduction <add>, %arg0, %arg1 : vector<16xi32> into i32
2430  return %0 : i32
2431}
2432// CHECK-LABEL: @reduce_acc_i32(
2433//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2434//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2435//       CHECK: %[[V:.*]] = llvm.add %[[ACC]], %[[R]]
2436//       CHECK: return %[[V]] : i32
2437
2438// -----
2439
2440func.func @reduce_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2441  %0 = vector.reduction <add>, %arg0, %arg1 : vector<[16]xi32> into i32
2442  return %0 : i32
2443}
2444// CHECK-LABEL: @reduce_acc_i32_scalable(
2445//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2446//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2447//       CHECK: %[[V:.*]] = llvm.add %[[ACC]], %[[R]]
2448//       CHECK: return %[[V]] : i32
2449
2450// -----
2451
2452func.func @reduce_mul_i32(%arg0: vector<16xi32>) -> i32 {
2453  %0 = vector.reduction <mul>, %arg0 : vector<16xi32> into i32
2454  return %0 : i32
2455}
2456// CHECK-LABEL: @reduce_mul_i32(
2457//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2458//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]])
2459//       CHECK: return %[[V]] : i32
2460
2461// -----
2462
2463func.func @reduce_mul_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2464  %0 = vector.reduction <mul>, %arg0 : vector<[16]xi32> into i32
2465  return %0 : i32
2466}
2467// CHECK-LABEL: @reduce_mul_i32_scalable(
2468//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2469//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]])
2470//       CHECK: return %[[V]] : i32
2471
2472// -----
2473
2474func.func @reduce_mul_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2475  %0 = vector.reduction <mul>, %arg0, %arg1 : vector<16xi32> into i32
2476  return %0 : i32
2477}
2478// CHECK-LABEL: @reduce_mul_acc_i32(
2479//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2480//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]])
2481//       CHECK: %[[V:.*]] = llvm.mul %[[ACC]], %[[R]]
2482//       CHECK: return %[[V]] : i32
2483
2484// -----
2485
2486func.func @reduce_mul_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2487  %0 = vector.reduction <mul>, %arg0, %arg1 : vector<[16]xi32> into i32
2488  return %0 : i32
2489}
2490// CHECK-LABEL: @reduce_mul_acc_i32_scalable(
2491//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2492//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]])
2493//       CHECK: %[[V:.*]] = llvm.mul %[[ACC]], %[[R]]
2494//       CHECK: return %[[V]] : i32
2495
2496// -----
2497
2498func.func @reduce_fmaximum_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 {
2499  %0 = vector.reduction <maximumf>, %arg0, %arg1 : vector<16xf32> into f32
2500  return %0 : f32
2501}
2502// CHECK-LABEL: @reduce_fmaximum_f32(
2503// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32)
2504//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmaximum(%[[A]]) : (vector<16xf32>) -> f32
2505//      CHECK: %[[R:.*]] = llvm.intr.maximum(%[[V]], %[[B]]) : (f32, f32) -> f32
2506//      CHECK: return %[[R]] : f32
2507
2508// -----
2509
2510func.func @reduce_fmaximum_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 {
2511  %0 = vector.reduction <maximumf>, %arg0, %arg1 : vector<[16]xf32> into f32
2512  return %0 : f32
2513}
2514// CHECK-LABEL: @reduce_fmaximum_f32_scalable(
2515// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32)
2516//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmaximum(%[[A]]) : (vector<[16]xf32>) -> f32
2517//      CHECK: %[[R:.*]] = llvm.intr.maximum(%[[V]], %[[B]]) : (f32, f32) -> f32
2518//      CHECK: return %[[R]] : f32
2519
2520// -----
2521
2522func.func @reduce_fminimum_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 {
2523  %0 = vector.reduction <minimumf>, %arg0, %arg1 : vector<16xf32> into f32
2524  return %0 : f32
2525}
2526// CHECK-LABEL: @reduce_fminimum_f32(
2527// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32)
2528//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fminimum(%[[A]]) : (vector<16xf32>) -> f32
2529//      CHECK: %[[R:.*]] = llvm.intr.minimum(%[[V]], %[[B]]) : (f32, f32) -> f32
2530//      CHECK: return %[[R]] : f32
2531
2532// -----
2533
2534func.func @reduce_fminimum_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 {
2535  %0 = vector.reduction <minimumf>, %arg0, %arg1 : vector<[16]xf32> into f32
2536  return %0 : f32
2537}
2538// CHECK-LABEL: @reduce_fminimum_f32_scalable(
2539// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32)
2540//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fminimum(%[[A]]) : (vector<[16]xf32>) -> f32
2541//      CHECK: %[[R:.*]] = llvm.intr.minimum(%[[V]], %[[B]]) : (f32, f32) -> f32
2542//      CHECK: return %[[R]] : f32
2543
2544// -----
2545
2546func.func @reduce_fmax_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 {
2547  %0 = vector.reduction <maxnumf>, %arg0, %arg1 : vector<16xf32> into f32
2548  return %0 : f32
2549}
2550// CHECK-LABEL: @reduce_fmax_f32(
2551// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32)
2552//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmax(%[[A]]) : (vector<16xf32>) -> f32
2553//      CHECK: %[[R:.*]] = llvm.intr.maxnum(%[[V]], %[[B]]) : (f32, f32) -> f32
2554//      CHECK: return %[[R]] : f32
2555
2556// -----
2557
2558func.func @reduce_fmax_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 {
2559  %0 = vector.reduction <maxnumf>, %arg0, %arg1 : vector<[16]xf32> into f32
2560  return %0 : f32
2561}
2562// CHECK-LABEL: @reduce_fmax_f32_scalable(
2563// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32)
2564//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmax(%[[A]]) : (vector<[16]xf32>) -> f32
2565//      CHECK: %[[R:.*]] = llvm.intr.maxnum(%[[V]], %[[B]]) : (f32, f32) -> f32
2566//      CHECK: return %[[R]] : f32
2567
2568// -----
2569
2570func.func @reduce_fmin_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 {
2571  %0 = vector.reduction <minnumf>, %arg0, %arg1 : vector<16xf32> into f32
2572  return %0 : f32
2573}
2574// CHECK-LABEL: @reduce_fmin_f32(
2575// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32)
2576//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmin(%[[A]]) : (vector<16xf32>) -> f32
2577//      CHECK: %[[R:.*]] = llvm.intr.minnum(%[[V]], %[[B]]) : (f32, f32) -> f32
2578//      CHECK: return %[[R]] : f32
2579
2580// -----
2581
2582func.func @reduce_fmin_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 {
2583  %0 = vector.reduction <minnumf>, %arg0, %arg1 : vector<[16]xf32> into f32
2584  return %0 : f32
2585}
2586// CHECK-LABEL: @reduce_fmin_f32_scalable(
2587// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32)
2588//      CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmin(%[[A]]) : (vector<[16]xf32>) -> f32
2589//      CHECK: %[[R:.*]] = llvm.intr.minnum(%[[V]], %[[B]]) : (f32, f32) -> f32
2590//      CHECK: return %[[R]] : f32
2591
2592// -----
2593
2594func.func @reduce_minui_i32(%arg0: vector<16xi32>) -> i32 {
2595  %0 = vector.reduction <minui>, %arg0 : vector<16xi32> into i32
2596  return %0 : i32
2597}
2598// CHECK-LABEL: @reduce_minui_i32(
2599//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2600//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]])
2601//       CHECK: return %[[V]] : i32
2602
2603// -----
2604
2605func.func @reduce_minui_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2606  %0 = vector.reduction <minui>, %arg0 : vector<[16]xi32> into i32
2607  return %0 : i32
2608}
2609// CHECK-LABEL: @reduce_minui_i32_scalable(
2610//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2611//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]])
2612//       CHECK: return %[[V]] : i32
2613
2614// -----
2615
2616func.func @reduce_minui_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2617  %0 = vector.reduction <minui>, %arg0, %arg1 : vector<16xi32> into i32
2618  return %0 : i32
2619}
2620// CHECK-LABEL: @reduce_minui_acc_i32(
2621//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2622//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]])
2623//       CHECK: %[[S:.*]] = llvm.icmp "ule" %[[ACC]], %[[R]]
2624//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2625//       CHECK: return %[[V]] : i32
2626
2627// -----
2628
2629func.func @reduce_minui_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2630  %0 = vector.reduction <minui>, %arg0, %arg1 : vector<[16]xi32> into i32
2631  return %0 : i32
2632}
2633// CHECK-LABEL: @reduce_minui_acc_i32_scalable(
2634//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2635//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]])
2636//       CHECK: %[[S:.*]] = llvm.icmp "ule" %[[ACC]], %[[R]]
2637//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2638//       CHECK: return %[[V]] : i32
2639
2640// -----
2641
2642func.func @reduce_maxui_i32(%arg0: vector<16xi32>) -> i32 {
2643  %0 = vector.reduction <maxui>, %arg0 : vector<16xi32> into i32
2644  return %0 : i32
2645}
2646// CHECK-LABEL: @reduce_maxui_i32(
2647//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2648//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]])
2649//       CHECK: return %[[V]] : i32
2650
2651// -----
2652
2653func.func @reduce_maxui_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2654  %0 = vector.reduction <maxui>, %arg0 : vector<[16]xi32> into i32
2655  return %0 : i32
2656}
2657// CHECK-LABEL: @reduce_maxui_i32_scalable(
2658//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2659//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]])
2660//       CHECK: return %[[V]] : i32
2661
2662// -----
2663
2664func.func @reduce_maxui_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2665  %0 = vector.reduction <maxui>, %arg0, %arg1 : vector<16xi32> into i32
2666  return %0 : i32
2667}
2668// CHECK-LABEL: @reduce_maxui_acc_i32(
2669//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2670//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]])
2671//       CHECK: %[[S:.*]] = llvm.icmp "uge" %[[ACC]], %[[R]]
2672//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2673//       CHECK: return %[[V]] : i32
2674
2675// -----
2676
2677func.func @reduce_maxui_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2678  %0 = vector.reduction <maxui>, %arg0, %arg1 : vector<[16]xi32> into i32
2679  return %0 : i32
2680}
2681// CHECK-LABEL: @reduce_maxui_acc_i32_scalable(
2682//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2683//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]])
2684//       CHECK: %[[S:.*]] = llvm.icmp "uge" %[[ACC]], %[[R]]
2685//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2686//       CHECK: return %[[V]] : i32
2687
2688// -----
2689
2690func.func @reduce_minsi_i32(%arg0: vector<16xi32>) -> i32 {
2691  %0 = vector.reduction <minsi>, %arg0 : vector<16xi32> into i32
2692  return %0 : i32
2693}
2694// CHECK-LABEL: @reduce_minsi_i32(
2695//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2696//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]])
2697//       CHECK: return %[[V]] : i32
2698
2699// -----
2700
2701func.func @reduce_minsi_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2702  %0 = vector.reduction <minsi>, %arg0 : vector<[16]xi32> into i32
2703  return %0 : i32
2704}
2705// CHECK-LABEL: @reduce_minsi_i32_scalable(
2706//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2707//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]])
2708//       CHECK: return %[[V]] : i32
2709
2710// -----
2711
2712func.func @reduce_minsi_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2713  %0 = vector.reduction <minsi>, %arg0, %arg1 : vector<16xi32> into i32
2714  return %0 : i32
2715}
2716// CHECK-LABEL: @reduce_minsi_acc_i32(
2717//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2718//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]])
2719//       CHECK: %[[S:.*]] = llvm.icmp "sle" %[[ACC]], %[[R]]
2720//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2721//       CHECK: return %[[V]] : i32
2722
2723// -----
2724
2725func.func @reduce_minsi_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2726  %0 = vector.reduction <minsi>, %arg0, %arg1 : vector<[16]xi32> into i32
2727  return %0 : i32
2728}
2729// CHECK-LABEL: @reduce_minsi_acc_i32_scalable(
2730//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2731//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]])
2732//       CHECK: %[[S:.*]] = llvm.icmp "sle" %[[ACC]], %[[R]]
2733//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2734//       CHECK: return %[[V]] : i32
2735
2736// -----
2737
2738func.func @reduce_maxsi_i32(%arg0: vector<16xi32>) -> i32 {
2739  %0 = vector.reduction <maxsi>, %arg0 : vector<16xi32> into i32
2740  return %0 : i32
2741}
2742// CHECK-LABEL: @reduce_maxsi_i32(
2743//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2744//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]])
2745//       CHECK: return %[[V]] : i32
2746
2747// -----
2748
2749func.func @reduce_maxsi_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2750  %0 = vector.reduction <maxsi>, %arg0 : vector<[16]xi32> into i32
2751  return %0 : i32
2752}
2753// CHECK-LABEL: @reduce_maxsi_i32_scalable(
2754//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2755//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]])
2756//       CHECK: return %[[V]] : i32
2757
2758// -----
2759
2760func.func @reduce_maxsi_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2761  %0 = vector.reduction <maxsi>, %arg0, %arg1 : vector<16xi32> into i32
2762  return %0 : i32
2763}
2764// CHECK-LABEL: @reduce_maxsi_acc_i32(
2765//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2766//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]])
2767//       CHECK: %[[S:.*]] = llvm.icmp "sge" %[[ACC]], %[[R]]
2768//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2769//       CHECK: return %[[V]] : i32
2770
2771// -----
2772
2773func.func @reduce_maxsi_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2774  %0 = vector.reduction <maxsi>, %arg0, %arg1 : vector<[16]xi32> into i32
2775  return %0 : i32
2776}
2777// CHECK-LABEL: @reduce_maxsi_acc_i32_scalable(
2778//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2779//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]])
2780//       CHECK: %[[S:.*]] = llvm.icmp "sge" %[[ACC]], %[[R]]
2781//       CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]]
2782//       CHECK: return %[[V]] : i32
2783
2784// -----
2785
2786func.func @reduce_and_i32(%arg0: vector<16xi32>) -> i32 {
2787  %0 = vector.reduction <and>, %arg0 : vector<16xi32> into i32
2788  return %0 : i32
2789}
2790// CHECK-LABEL: @reduce_and_i32(
2791//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2792//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.and"(%[[A]])
2793//       CHECK: return %[[V]] : i32
2794
2795// -----
2796
2797func.func @reduce_and_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2798  %0 = vector.reduction <and>, %arg0 : vector<[16]xi32> into i32
2799  return %0 : i32
2800}
2801// CHECK-LABEL: @reduce_and_i32_scalable(
2802//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2803//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.and"(%[[A]])
2804//       CHECK: return %[[V]] : i32
2805
2806// -----
2807
2808func.func @reduce_and_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2809  %0 = vector.reduction <and>, %arg0, %arg1 : vector<16xi32> into i32
2810  return %0 : i32
2811}
2812// CHECK-LABEL: @reduce_and_acc_i32(
2813//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2814//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.and"(%[[A]])
2815//       CHECK: %[[V:.*]] = llvm.and %[[ACC]], %[[R]]
2816//       CHECK: return %[[V]] : i32
2817
2818// -----
2819
2820func.func @reduce_and_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2821  %0 = vector.reduction <and>, %arg0, %arg1 : vector<[16]xi32> into i32
2822  return %0 : i32
2823}
2824// CHECK-LABEL: @reduce_and_acc_i32_scalable(
2825//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2826//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.and"(%[[A]])
2827//       CHECK: %[[V:.*]] = llvm.and %[[ACC]], %[[R]]
2828//       CHECK: return %[[V]] : i32
2829
2830// -----
2831
2832func.func @reduce_or_i32(%arg0: vector<16xi32>) -> i32 {
2833  %0 = vector.reduction <or>, %arg0 : vector<16xi32> into i32
2834  return %0 : i32
2835}
2836// CHECK-LABEL: @reduce_or_i32(
2837//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2838//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.or"(%[[A]])
2839//       CHECK: return %[[V]] : i32
2840
2841// -----
2842
2843func.func @reduce_or_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2844  %0 = vector.reduction <or>, %arg0 : vector<[16]xi32> into i32
2845  return %0 : i32
2846}
2847// CHECK-LABEL: @reduce_or_i32_scalable(
2848//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2849//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.or"(%[[A]])
2850//       CHECK: return %[[V]] : i32
2851
2852// -----
2853
2854func.func @reduce_or_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2855  %0 = vector.reduction <or>, %arg0, %arg1 : vector<16xi32> into i32
2856  return %0 : i32
2857}
2858// CHECK-LABEL: @reduce_or_acc_i32(
2859//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2860//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.or"(%[[A]])
2861//       CHECK: %[[V:.*]] = llvm.or %[[ACC]], %[[R]]
2862//       CHECK: return %[[V]] : i32
2863
2864// -----
2865
2866func.func @reduce_or_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2867  %0 = vector.reduction <or>, %arg0, %arg1 : vector<[16]xi32> into i32
2868  return %0 : i32
2869}
2870// CHECK-LABEL: @reduce_or_acc_i32_scalable(
2871//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2872//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.or"(%[[A]])
2873//       CHECK: %[[V:.*]] = llvm.or %[[ACC]], %[[R]]
2874//       CHECK: return %[[V]] : i32
2875
2876// -----
2877
2878func.func @reduce_xor_i32(%arg0: vector<16xi32>) -> i32 {
2879  %0 = vector.reduction <xor>, %arg0 : vector<16xi32> into i32
2880  return %0 : i32
2881}
2882// CHECK-LABEL: @reduce_xor_i32(
2883//  CHECK-SAME: %[[A:.*]]: vector<16xi32>)
2884//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]])
2885//       CHECK: return %[[V]] : i32
2886
2887// -----
2888
2889func.func @reduce_xor_i32_scalable(%arg0: vector<[16]xi32>) -> i32 {
2890  %0 = vector.reduction <xor>, %arg0 : vector<[16]xi32> into i32
2891  return %0 : i32
2892}
2893// CHECK-LABEL: @reduce_xor_i32_scalable(
2894//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>)
2895//       CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]])
2896//       CHECK: return %[[V]] : i32
2897
2898// -----
2899
2900func.func @reduce_xor_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 {
2901  %0 = vector.reduction <xor>, %arg0, %arg1 : vector<16xi32> into i32
2902  return %0 : i32
2903}
2904// CHECK-LABEL: @reduce_xor_acc_i32(
2905//  CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32)
2906//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]])
2907//       CHECK: %[[V:.*]] = llvm.xor %[[ACC]], %[[R]]
2908//       CHECK: return %[[V]] : i32
2909
2910// -----
2911
2912func.func @reduce_xor_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 {
2913  %0 = vector.reduction <xor>, %arg0, %arg1 : vector<[16]xi32> into i32
2914  return %0 : i32
2915}
2916// CHECK-LABEL: @reduce_xor_acc_i32_scalable(
2917//  CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32)
2918//       CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]])
2919//       CHECK: %[[V:.*]] = llvm.xor %[[ACC]], %[[R]]
2920//       CHECK: return %[[V]] : i32
2921
2922// -----
2923
2924func.func @reduce_i64(%arg0: vector<16xi64>) -> i64 {
2925  %0 = vector.reduction <add>, %arg0 : vector<16xi64> into i64
2926  return %0 : i64
2927}
2928// CHECK-LABEL: @reduce_i64(
2929// CHECK-SAME: %[[A:.*]]: vector<16xi64>)
2930//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2931//      CHECK: return %[[V]] : i64
2932
2933// -----
2934
2935func.func @reduce_i64_scalable(%arg0: vector<[16]xi64>) -> i64 {
2936  %0 = vector.reduction <add>, %arg0 : vector<[16]xi64> into i64
2937  return %0 : i64
2938}
2939// CHECK-LABEL: @reduce_i64_scalable(
2940// CHECK-SAME: %[[A:.*]]: vector<[16]xi64>)
2941//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
2942//      CHECK: return %[[V]] : i64
2943
2944// -----
2945
2946func.func @reduce_index(%arg0: vector<16xindex>) -> index {
2947  %0 = vector.reduction <add>, %arg0 : vector<16xindex> into index
2948  return %0 : index
2949}
2950// CHECK-LABEL: @reduce_index(
2951// CHECK-SAME: %[[A:.*]]: vector<16xindex>)
2952//      CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<16xindex> to vector<16xi64>
2953//      CHECK: %[[T1:.*]] = "llvm.intr.vector.reduce.add"(%[[T0]])
2954//      CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : i64 to index
2955//      CHECK: return %[[T2]] : index
2956
2957// -----
2958
2959func.func @reduce_index_scalable(%arg0: vector<[16]xindex>) -> index {
2960  %0 = vector.reduction <add>, %arg0 : vector<[16]xindex> into index
2961  return %0 : index
2962}
2963// CHECK-LABEL: @reduce_index_scalable(
2964// CHECK-SAME: %[[A:.*]]: vector<[16]xindex>)
2965//      CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<[16]xindex> to vector<[16]xi64>
2966//      CHECK: %[[T1:.*]] = "llvm.intr.vector.reduce.add"(%[[T0]])
2967//      CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : i64 to index
2968//      CHECK: return %[[T2]] : index
2969
2970// -----
2971
2972//===----------------------------------------------------------------------===//
2973// vector.matrix_multiply
2974//===----------------------------------------------------------------------===//
2975
2976//                          4x16                16x3               4x3
2977func.func @matrix_ops(%A: vector<64xf64>, %B: vector<48xf64>) -> vector<12xf64> {
2978  %C = vector.matrix_multiply %A, %B
2979    { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
2980    (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
2981  return %C: vector<12xf64>
2982}
2983// CHECK-LABEL: @matrix_ops
2984//       CHECK:   llvm.intr.matrix.multiply %{{.*}}, %{{.*}} {
2985//  CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32
2986//  CHECK-SAME: } : (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
2987
2988// -----
2989
2990func.func @matrix_ops_index(%A: vector<64xindex>, %B: vector<48xindex>) -> vector<12xindex> {
2991  %C = vector.matrix_multiply %A, %B
2992    { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
2993    (vector<64xindex>, vector<48xindex>) -> vector<12xindex>
2994  return %C: vector<12xindex>
2995}
2996// CHECK-LABEL: @matrix_ops_index
2997//       CHECK:   llvm.intr.matrix.multiply %{{.*}}, %{{.*}} {
2998//  CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32
2999//  CHECK-SAME: } : (vector<64xi64>, vector<48xi64>) -> vector<12xi64>
3000
3001// -----
3002
3003//===----------------------------------------------------------------------===//
3004// vector.constant_mask
3005//===----------------------------------------------------------------------===//
3006
3007func.func @constant_mask_0d_f() -> vector<i1> {
3008  %0 = vector.constant_mask [0] : vector<i1>
3009  return %0 : vector<i1>
3010}
3011// CHECK-LABEL: func @constant_mask_0d_f
3012// CHECK: %[[VAL_0:.*]] = arith.constant dense<false> : vector<i1>
3013// CHECK: return %[[VAL_0]] : vector<i1>
3014
3015// -----
3016
3017func.func @constant_mask_0d_t() -> vector<i1> {
3018  %0 = vector.constant_mask [1] : vector<i1>
3019  return %0 : vector<i1>
3020}
3021// CHECK-LABEL: func @constant_mask_0d_t
3022// CHECK: %[[VAL_0:.*]] = arith.constant dense<true> : vector<i1>
3023// CHECK: return %[[VAL_0]] : vector<i1>
3024
3025// -----
3026
3027func.func @constant_mask_1d() -> vector<8xi1> {
3028  %0 = vector.constant_mask [4] : vector<8xi1>
3029  return %0 : vector<8xi1>
3030}
3031// CHECK-LABEL: func @constant_mask_1d
3032// CHECK: %[[VAL_0:.*]] = arith.constant dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>
3033// CHECK: return %[[VAL_0]] : vector<8xi1>
3034
3035// -----
3036
3037func.func @constant_mask_1d_scalable_all_false() -> vector<[8]xi1> {
3038  %0 = vector.constant_mask [0] : vector<[8]xi1>
3039  return %0 : vector<[8]xi1>
3040}
3041// CHECK-LABEL: func @constant_mask_1d_scalable_all_false
3042// CHECK: %[[VAL_0:.*]] = arith.constant dense<false> : vector<[8]xi1>
3043// CHECK: return %[[VAL_0]] : vector<[8]xi1>
3044
3045// -----
3046
3047func.func @constant_mask_1d_scalable_all_true() -> vector<[8]xi1> {
3048  %0 = vector.constant_mask [8] : vector<[8]xi1>
3049  return %0 : vector<[8]xi1>
3050}
3051// CHECK-LABEL: func @constant_mask_1d_scalable_all_true
3052// CHECK: %[[VAL_0:.*]] = arith.constant dense<true> : vector<[8]xi1>
3053// CHECK: return %[[VAL_0]] : vector<[8]xi1>
3054
3055// -----
3056
3057func.func @constant_mask_2d() -> vector<4x4xi1> {
3058  %v = vector.constant_mask [2, 2] : vector<4x4xi1>
3059  return %v: vector<4x4xi1>
3060}
3061
3062// CHECK-LABEL: func @constant_mask_2d
3063// CHECK: %[[VAL_0:.*]] = arith.constant dense<[true, true, false, false]> : vector<4xi1>
3064// CHECK: %[[VAL_1:.*]] = arith.constant dense<false> : vector<4x4xi1>
3065// CHECK: %[[VAL_2:.*]] = builtin.unrealized_conversion_cast %[[VAL_1]] : vector<4x4xi1> to !llvm.array<4 x vector<4xi1>>
3066// CHECK: %[[VAL_3:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_2]][0] : !llvm.array<4 x vector<4xi1>>
3067// CHECK: %[[VAL_4:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_3]][1] : !llvm.array<4 x vector<4xi1>>
3068// CHECK: %[[VAL_5:.*]] = builtin.unrealized_conversion_cast %[[VAL_4]] : !llvm.array<4 x vector<4xi1>> to vector<4x4xi1>
3069// CHECK: return %[[VAL_5]] : vector<4x4xi1>
3070
3071// -----
3072
3073func.func @constant_mask_2d_trailing_scalable() -> vector<4x[4]xi1> {
3074  %0 = vector.constant_mask [2, 4] : vector<4x[4]xi1>
3075  return %0 : vector<4x[4]xi1>
3076}
3077// CHECK-LABEL:   func.func @constant_mask_2d_trailing_scalable
3078// CHECK:           %[[VAL_0:.*]] = arith.constant dense<true> : vector<[4]xi1>
3079// CHECK:           %[[VAL_1:.*]] = arith.constant dense<false> : vector<4x[4]xi1>
3080// CHECK:           %[[VAL_2:.*]] = builtin.unrealized_conversion_cast %[[VAL_1]] : vector<4x[4]xi1> to !llvm.array<4 x vector<[4]xi1>>
3081// CHECK:           %[[VAL_3:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_2]][0] : !llvm.array<4 x vector<[4]xi1>>
3082// CHECK:           %[[VAL_4:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_3]][1] : !llvm.array<4 x vector<[4]xi1>>
3083// CHECK:           %[[VAL_5:.*]] = builtin.unrealized_conversion_cast %[[VAL_4]] : !llvm.array<4 x vector<[4]xi1>> to vector<4x[4]xi1>
3084// CHECK:           return %[[VAL_5]] : vector<4x[4]xi1>
3085
3086// -----
3087
3088/// Currently, this is not supported as generating the mask would require
3089/// unrolling the leading scalable dimension at compile time.
3090func.func @negative_constant_mask_2d_leading_scalable() -> vector<[4]x4xi1> {
3091  %0 = vector.constant_mask [4, 2] : vector<[4]x4xi1>
3092  return %0 : vector<[4]x4xi1>
3093}
3094// CHECK-LABEL:   func.func @negative_constant_mask_2d_leading_scalable
3095// CHECK:           %[[VAL_0:.*]] = vector.constant_mask [4, 2] : vector<[4]x4xi1>
3096// CHECK:           return %[[VAL_0]] : vector<[4]x4xi1>
3097
3098// -----
3099
3100//===----------------------------------------------------------------------===//
3101// vector.create_mask
3102//===----------------------------------------------------------------------===//
3103
3104func.func @create_mask_0d(%num_elems : index) -> vector<i1> {
3105  %v = vector.create_mask %num_elems : vector<i1>
3106  return %v: vector<i1>
3107}
3108
3109// CHECK-LABEL: func @create_mask_0d
3110// CHECK-SAME: %[[NUM_ELEMS:.*]]: index
3111// CHECK:  %[[INDICES:.*]] = arith.constant dense<0> : vector<i32>
3112// CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32
3113// CHECK:  %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
3114// CHECK:  %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOUNDS]] : vector<1xi32> to vector<i32>
3115// CHECK:  %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : vector<i32>
3116// CHECK:  return %[[RESULT]] : vector<i1>
3117
3118// -----
3119
3120func.func @create_mask_1d(%num_elems : index) -> vector<4xi1> {
3121  %v = vector.create_mask %num_elems : vector<4xi1>
3122  return %v: vector<4xi1>
3123}
3124
3125// CHECK-LABEL: func @create_mask_1d
3126// CHECK-SAME: %[[NUM_ELEMS:.*]]: index
3127// CHECK:  %[[INDICES:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
3128// CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32
3129// CHECK:  %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
3130// CHECK:  %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]]
3131// CHECK:  %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS]], %[[INDICES]] : vector<4xi32>
3132// CHECK:  return %[[RESULT]] : vector<4xi1>
3133
3134// -----
3135
3136func.func @create_mask_1d_scalable(%num_elems : index) -> vector<[4]xi1> {
3137  %v = vector.create_mask %num_elems : vector<[4]xi1>
3138  return %v: vector<[4]xi1>
3139}
3140
3141// CHECK-LABEL: func @create_mask_1d_scalable
3142// CHECK-SAME: %[[NUM_ELEMS:.*]]: index
3143// CHECK:  %[[INDICES:.*]] = llvm.intr.stepvector : vector<[4]xi32>
3144// CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32
3145// CHECK:  %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]], {{.*}} : vector<[4]xi32>
3146// CHECK:  %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]], {{.*}} : vector<[4]xi32>
3147// CHECK:  %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS]] : vector<[4]xi32>
3148// CHECK: return %[[RESULT]] : vector<[4]xi1>
3149
3150// -----
3151
3152//===----------------------------------------------------------------------===//
3153// vector.transpose
3154//===----------------------------------------------------------------------===//
3155
3156func.func @transpose_0d(%arg0: vector<f32>) -> vector<f32> {
3157  %0 = vector.transpose %arg0, [] : vector<f32> to vector<f32>
3158  return %0 : vector<f32>
3159}
3160
3161// CHECK-LABEL: func @transpose_0d
3162// CHECK-SAME:  %[[A:.*]]: vector<f32>
3163// CHECK:       return %[[A]] : vector<f32>
3164
3165// -----
3166
3167//===----------------------------------------------------------------------===//
3168// vector.flat_transpose
3169//===----------------------------------------------------------------------===//
3170
3171func.func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> {
3172  %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 }
3173     : vector<16xf32> -> vector<16xf32>
3174  return %0 : vector<16xf32>
3175}
3176
3177// CHECK-LABEL: func @flat_transpose
3178// CHECK-SAME:  %[[A:.*]]: vector<16xf32>
3179// CHECK:       %[[T:.*]] = llvm.intr.matrix.transpose %[[A]]
3180// CHECK-SAME:      {columns = 4 : i32, rows = 4 : i32} :
3181// CHECK-SAME:      vector<16xf32> into vector<16xf32>
3182// CHECK:       return %[[T]] : vector<16xf32>
3183
3184// -----
3185
3186func.func @flat_transpose_index(%arg0: vector<16xindex>) -> vector<16xindex> {
3187  %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 }
3188     : vector<16xindex> -> vector<16xindex>
3189  return %0 : vector<16xindex>
3190}
3191// CHECK-LABEL: func @flat_transpose_index
3192// CHECK-SAME:  %[[A:.*]]: vector<16xindex>
3193// CHECK:       %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<16xindex> to vector<16xi64>
3194// CHECK:       %[[T1:.*]] = llvm.intr.matrix.transpose %[[T0]]
3195// CHECK-SAME:      {columns = 4 : i32, rows = 4 : i32} :
3196// CHECK-SAME:      vector<16xi64> into vector<16xi64>
3197// CHECK:       %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<16xi64> to vector<16xindex>
3198// CHECK:       return %[[T2]] : vector<16xindex>
3199
3200// -----
3201
3202func.func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> {
3203  %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 }
3204     : vector<16xf32> -> vector<16xf32>
3205  return %0 : vector<16xf32>
3206}
3207
3208// CHECK-LABEL: func @flat_transpose
3209// CHECK-SAME:  %[[A:.*]]: vector<16xf32>
3210// CHECK:       %[[T:.*]] = llvm.intr.matrix.transpose %[[A]]
3211// CHECK-SAME:      {columns = 4 : i32, rows = 4 : i32} :
3212// CHECK-SAME:      vector<16xf32> into vector<16xf32>
3213// CHECK:       return %[[T]] : vector<16xf32>
3214
3215// -----
3216
3217//===----------------------------------------------------------------------===//
3218// vector.load
3219//===----------------------------------------------------------------------===//
3220
3221func.func @load(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> {
3222  %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<8xf32>
3223  return %0 : vector<8xf32>
3224}
3225
3226// CHECK-LABEL: func @load
3227// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3228// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3229// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3230// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3231// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64} : !llvm.ptr -> vector<8xf32>
3232
3233// -----
3234
3235func.func @load_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> {
3236  %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<[8]xf32>
3237  return %0 : vector<[8]xf32>
3238}
3239
3240// CHECK-LABEL: func @load_scalable
3241// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3242// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3243// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3244// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3245// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64} : !llvm.ptr -> vector<[8]xf32>
3246
3247// -----
3248
3249func.func @load_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> {
3250  %0 = vector.load %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<8xf32>
3251  return %0 : vector<8xf32>
3252}
3253
3254// CHECK-LABEL: func @load_nontemporal
3255// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3256// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3257// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3258// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3259// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<8xf32>
3260
3261// -----
3262
3263func.func @load_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> {
3264  %0 = vector.load %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<[8]xf32>
3265  return %0 : vector<[8]xf32>
3266}
3267
3268// CHECK-LABEL: func @load_nontemporal_scalable
3269// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3270// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3271// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3272// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3273// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<[8]xf32>
3274
3275// -----
3276
3277func.func @load_index(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<8xindex> {
3278  %0 = vector.load %memref[%i, %j] : memref<200x100xindex>, vector<8xindex>
3279  return %0 : vector<8xindex>
3280}
3281// CHECK-LABEL: func @load_index
3282// CHECK: %[[T0:.*]] = llvm.load %{{.*}} {alignment = 8 : i64} : !llvm.ptr -> vector<8xi64>
3283// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<8xi64> to vector<8xindex>
3284// CHECK: return %[[T1]] : vector<8xindex>
3285
3286// -----
3287
3288func.func @load_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<[8]xindex> {
3289  %0 = vector.load %memref[%i, %j] : memref<200x100xindex>, vector<[8]xindex>
3290  return %0 : vector<[8]xindex>
3291}
3292// CHECK-LABEL: func @load_index_scalable
3293// CHECK: %[[T0:.*]] = llvm.load %{{.*}} {alignment = 8 : i64} : !llvm.ptr -> vector<[8]xi64>
3294// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<[8]xi64> to vector<[8]xindex>
3295// CHECK: return %[[T1]] : vector<[8]xindex>
3296
3297// -----
3298
3299func.func @load_0d(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<f32> {
3300  %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<f32>
3301  return %0 : vector<f32>
3302}
3303
3304// CHECK-LABEL: func @load_0d
3305// CHECK: %[[J:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
3306// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
3307// CHECK: %[[CAST_MEMREF:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
3308// CHECK: %[[REF:.*]] = llvm.extractvalue %[[CAST_MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
3309// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3310// CHECK: %[[MUL:.*]] = llvm.mul %[[I]], %[[C100]] : i64
3311// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %[[J]] : i64
3312// CHECK: %[[ADDR:.*]] = llvm.getelementptr %[[REF]][%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3313// CHECK: %[[LOAD:.*]] = llvm.load %[[ADDR]] {alignment = 4 : i64} : !llvm.ptr -> vector<1xf32>
3314// CHECK: %[[RES:.*]] = builtin.unrealized_conversion_cast %[[LOAD]] : vector<1xf32> to vector<f32>
3315// CHECK: return %[[RES]] : vector<f32>
3316// -----
3317
3318//===----------------------------------------------------------------------===//
3319// vector.store
3320//===----------------------------------------------------------------------===//
3321
3322func.func @store(%memref : memref<200x100xf32>, %i : index, %j : index) {
3323  %val = arith.constant dense<11.0> : vector<4xf32>
3324  vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<4xf32>
3325  return
3326}
3327
3328// CHECK-LABEL: func @store
3329// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3330// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3331// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3332// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3333// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64} :  vector<4xf32>, !llvm.ptr
3334
3335// -----
3336
3337func.func @store_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) {
3338  %val = arith.constant dense<11.0> : vector<[4]xf32>
3339  vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<[4]xf32>
3340  return
3341}
3342
3343// CHECK-LABEL: func @store_scalable
3344// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3345// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3346// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3347// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3348// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64} :  vector<[4]xf32>, !llvm.ptr
3349
3350// -----
3351
3352func.func @store_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) {
3353  %val = arith.constant dense<11.0> : vector<4xf32>
3354  vector.store %val, %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<4xf32>
3355  return
3356}
3357
3358// CHECK-LABEL: func @store_nontemporal
3359// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3360// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3361// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3362// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3363// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64, nontemporal} :  vector<4xf32>, !llvm.ptr
3364
3365// -----
3366
3367func.func @store_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) {
3368  %val = arith.constant dense<11.0> : vector<[4]xf32>
3369  vector.store %val, %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<[4]xf32>
3370  return
3371}
3372
3373// CHECK-LABEL: func @store_nontemporal_scalable
3374// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3375// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]]  : i64
3376// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}}  : i64
3377// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3378// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64, nontemporal} :  vector<[4]xf32>, !llvm.ptr
3379
3380// -----
3381
3382func.func @store_index(%memref : memref<200x100xindex>, %i : index, %j : index) {
3383  %val = arith.constant dense<11> : vector<4xindex>
3384  vector.store %val, %memref[%i, %j] : memref<200x100xindex>, vector<4xindex>
3385  return
3386}
3387// CHECK-LABEL: func @store_index
3388// CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 8 : i64} : vector<4xi64>, !llvm.ptr
3389
3390// -----
3391
3392func.func @store_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) {
3393  %val = arith.constant dense<11> : vector<[4]xindex>
3394  vector.store %val, %memref[%i, %j] : memref<200x100xindex>, vector<[4]xindex>
3395  return
3396}
3397// CHECK-LABEL: func @store_index_scalable
3398// CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 8 : i64} : vector<[4]xi64>, !llvm.ptr
3399
3400// -----
3401
3402func.func @store_0d(%memref : memref<200x100xf32>, %i : index, %j : index) {
3403  %val = arith.constant dense<11.0> : vector<f32>
3404  vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<f32>
3405  return
3406}
3407
3408// CHECK-LABEL: func @store_0d
3409// CHECK: %[[J:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
3410// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64
3411// CHECK: %[[CAST_MEMREF:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
3412// CHECK: %[[CST:.*]] = arith.constant dense<1.100000e+01> : vector<f32>
3413// CHECK: %[[VAL:.*]] = builtin.unrealized_conversion_cast %[[CST]] : vector<f32> to vector<1xf32>
3414// CHECK: %[[REF:.*]] = llvm.extractvalue %[[CAST_MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
3415// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64
3416// CHECK: %[[MUL:.*]] = llvm.mul %[[I]], %[[C100]] : i64
3417// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %[[J]] : i64
3418// CHECK: %[[ADDR:.*]] = llvm.getelementptr %[[REF]][%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3419// CHECK: llvm.store %[[VAL]], %[[ADDR]] {alignment = 4 : i64} : vector<1xf32>, !llvm.ptr
3420// CHECK: return
3421
3422// -----
3423
3424//===----------------------------------------------------------------------===//
3425// vector.maskedload
3426//===----------------------------------------------------------------------===//
3427
3428func.func @masked_load(%arg0: memref<?xf32>, %arg1: vector<16xi1>, %arg2: vector<16xf32>) -> vector<16xf32> {
3429  %c0 = arith.constant 0: index
3430  %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
3431  return %0 : vector<16xf32>
3432}
3433
3434// CHECK-LABEL: func @masked_load
3435// CHECK: %[[CO:.*]] = arith.constant 0 : index
3436// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64
3437// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3438// CHECK: %[[L:.*]] = llvm.intr.masked.load %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr, vector<16xi1>, vector<16xf32>) -> vector<16xf32>
3439// CHECK: return %[[L]] : vector<16xf32>
3440
3441// -----
3442
3443func.func @masked_load_scalable(%arg0: memref<?xf32>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xf32>) -> vector<[16]xf32> {
3444  %c0 = arith.constant 0: index
3445  %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<[16]xi1>, vector<[16]xf32> into vector<[16]xf32>
3446  return %0 : vector<[16]xf32>
3447}
3448
3449// CHECK-LABEL: func @masked_load_scalable
3450// CHECK: %[[CO:.*]] = arith.constant 0 : index
3451// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64
3452// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3453// CHECK: %[[L:.*]] = llvm.intr.masked.load %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr, vector<[16]xi1>, vector<[16]xf32>) -> vector<[16]xf32>
3454// CHECK: return %[[L]] : vector<[16]xf32>
3455
3456// -----
3457
3458func.func @masked_load_index(%arg0: memref<?xindex>, %arg1: vector<16xi1>, %arg2: vector<16xindex>) -> vector<16xindex> {
3459  %c0 = arith.constant 0: index
3460  %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<16xi1>, vector<16xindex> into vector<16xindex>
3461  return %0 : vector<16xindex>
3462}
3463// CHECK-LABEL: func @masked_load_index
3464// CHECK: %{{.*}} = llvm.intr.masked.load %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.ptr, vector<16xi1>, vector<16xi64>) -> vector<16xi64>
3465
3466// -----
3467
3468func.func @masked_load_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xindex>) -> vector<[16]xindex> {
3469  %c0 = arith.constant 0: index
3470  %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<[16]xi1>, vector<[16]xindex> into vector<[16]xindex>
3471  return %0 : vector<[16]xindex>
3472}
3473// CHECK-LABEL: func @masked_load_index_scalable
3474// CHECK: %{{.*}} = llvm.intr.masked.load %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.ptr, vector<[16]xi1>, vector<[16]xi64>) -> vector<[16]xi64>
3475
3476// -----
3477
3478//===----------------------------------------------------------------------===//
3479// vector.maskedstore
3480//===----------------------------------------------------------------------===//
3481
3482func.func @masked_store(%arg0: memref<?xf32>, %arg1: vector<16xi1>, %arg2: vector<16xf32>) {
3483  %c0 = arith.constant 0: index
3484  vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<16xi1>, vector<16xf32>
3485  return
3486}
3487
3488// CHECK-LABEL: func @masked_store
3489// CHECK: %[[CO:.*]] = arith.constant 0 : index
3490// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64
3491// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3492// CHECK: llvm.intr.masked.store %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<16xf32>, vector<16xi1> into !llvm.ptr
3493
3494// -----
3495
3496func.func @masked_store_scalable(%arg0: memref<?xf32>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xf32>) {
3497  %c0 = arith.constant 0: index
3498  vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<[16]xi1>, vector<[16]xf32>
3499  return
3500}
3501
3502// CHECK-LABEL: func @masked_store_scalable
3503// CHECK: %[[CO:.*]] = arith.constant 0 : index
3504// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64
3505// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3506// CHECK: llvm.intr.masked.store %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<[16]xf32>, vector<[16]xi1> into !llvm.ptr
3507
3508// -----
3509
3510func.func @masked_store_index(%arg0: memref<?xindex>, %arg1: vector<16xi1>, %arg2: vector<16xindex>) {
3511  %c0 = arith.constant 0: index
3512  vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<16xi1>, vector<16xindex>
3513  return
3514}
3515// CHECK-LABEL: func @masked_store_index
3516// CHECK: llvm.intr.masked.store %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : vector<16xi64>, vector<16xi1> into !llvm.ptr
3517
3518// -----
3519
3520func.func @masked_store_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xindex>) {
3521  %c0 = arith.constant 0: index
3522  vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<[16]xi1>, vector<[16]xindex>
3523  return
3524}
3525// CHECK-LABEL: func @masked_store_index_scalable
3526// CHECK: llvm.intr.masked.store %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : vector<[16]xi64>, vector<[16]xi1> into !llvm.ptr
3527
3528// -----
3529
3530//===----------------------------------------------------------------------===//
3531// vector.gather
3532//===----------------------------------------------------------------------===//
3533
3534func.func @gather(%arg0: memref<?xf32>, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) -> vector<3xf32> {
3535  %0 = arith.constant 0: index
3536  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<3xi32>, vector<3xi1>, vector<3xf32> into vector<3xf32>
3537  return %1 : vector<3xf32>
3538}
3539
3540// CHECK-LABEL: func @gather
3541// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32
3542// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32>
3543// CHECK: return %[[G]] : vector<3xf32>
3544
3545// -----
3546
3547func.func @gather_scalable(%arg0: memref<?xf32>, %arg1: vector<[3]xi32>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xf32>) -> vector<[3]xf32> {
3548  %0 = arith.constant 0: index
3549  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<[3]xi32>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32>
3550  return %1 : vector<[3]xf32>
3551}
3552
3553// CHECK-LABEL: func @gather_scalable
3554// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32
3555// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32>
3556// CHECK: return %[[G]] : vector<[3]xf32>
3557
3558// -----
3559
3560func.func @gather_global_memory(%arg0: memref<?xf32, 1>, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) -> vector<3xf32> {
3561  %0 = arith.constant 0: index
3562  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32, 1>, vector<3xi32>, vector<3xi1>, vector<3xf32> into vector<3xf32>
3563  return %1 : vector<3xf32>
3564}
3565
3566// CHECK-LABEL: func @gather_global_memory
3567// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr<1>, vector<3xi32>) -> !llvm.vec<3 x ptr<1>>, f32
3568// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr<1>>, vector<3xi1>, vector<3xf32>) -> vector<3xf32>
3569// CHECK: return %[[G]] : vector<3xf32>
3570
3571// -----
3572
3573func.func @gather_global_memory_scalable(%arg0: memref<?xf32, 1>, %arg1: vector<[3]xi32>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xf32>) -> vector<[3]xf32> {
3574  %0 = arith.constant 0: index
3575  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32, 1>, vector<[3]xi32>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32>
3576  return %1 : vector<[3]xf32>
3577}
3578
3579// CHECK-LABEL: func @gather_global_memory_scalable
3580// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr<1>, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr<1>>, f32
3581// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr<1>>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32>
3582// CHECK: return %[[G]] : vector<[3]xf32>
3583
3584// -----
3585
3586
3587func.func @gather_index(%arg0: memref<?xindex>, %arg1: vector<3xindex>, %arg2: vector<3xi1>, %arg3: vector<3xindex>) -> vector<3xindex> {
3588  %0 = arith.constant 0: index
3589  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<3xindex>, vector<3xi1>, vector<3xindex> into vector<3xindex>
3590  return %1 : vector<3xindex>
3591}
3592
3593// CHECK-LABEL: func @gather_index
3594// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi64>) -> !llvm.vec<3 x ptr>, i64
3595// CHECK: %[[G:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xi64>) -> vector<3xi64>
3596// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[G]] : vector<3xi64> to vector<3xindex>
3597
3598// -----
3599
3600func.func @gather_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[3]xindex>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xindex>) -> vector<[3]xindex> {
3601  %0 = arith.constant 0: index
3602  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xindex> into vector<[3]xindex>
3603  return %1 : vector<[3]xindex>
3604}
3605
3606// CHECK-LABEL: func @gather_index_scalable
3607// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi64>) -> !llvm.vec<? x 3 x ptr>, i64
3608// CHECK: %[[G:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xi64>) -> vector<[3]xi64>
3609// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[G]] : vector<[3]xi64> to vector<[3]xindex>
3610
3611// -----
3612
3613func.func @gather_2d_from_1d(%arg0: memref<?xf32>, %arg1: vector<2x3xi32>, %arg2: vector<2x3xi1>, %arg3: vector<2x3xf32>) -> vector<2x3xf32> {
3614  %0 = arith.constant 0: index
3615  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<2x3xi32>, vector<2x3xi1>, vector<2x3xf32> into vector<2x3xf32>
3616  return %1 : vector<2x3xf32>
3617}
3618
3619// CHECK-LABEL: func @gather_2d_from_1d
3620// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
3621// CHECK: %[[I0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<3xi32>>
3622// CHECK: %[[M0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<3xi1>>
3623// CHECK: %[[S0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<3xf32>>
3624// CHECK: %[[P0:.*]] = llvm.getelementptr %[[B]][%[[I0]]] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32
3625// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %[[P0]], %[[M0]], %[[S0]] {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32>
3626// CHECK: %{{.*}} = llvm.insertvalue %[[G0]], %{{.*}}[0] : !llvm.array<2 x vector<3xf32>>
3627// CHECK: %[[I1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<3xi32>>
3628// CHECK: %[[M1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<3xi1>>
3629// CHECK: %[[S1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<3xf32>>
3630// CHECK: %[[P1:.*]] = llvm.getelementptr %[[B]][%[[I1]]] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32
3631// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %[[P1]], %[[M1]], %[[S1]] {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32>
3632// CHECK: %{{.*}} = llvm.insertvalue %[[G1]], %{{.*}}[1] : !llvm.array<2 x vector<3xf32>>
3633
3634// -----
3635
3636func.func @gather_2d_from_1d_scalable(%arg0: memref<?xf32>, %arg1: vector<2x[3]xi32>, %arg2: vector<2x[3]xi1>, %arg3: vector<2x[3]xf32>) -> vector<2x[3]xf32> {
3637  %0 = arith.constant 0: index
3638  %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<2x[3]xi32>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32>
3639  return %1 : vector<2x[3]xf32>
3640}
3641
3642// CHECK-LABEL: func @gather_2d_from_1d_scalable
3643// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32
3644// CHECK: %[[I0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<[3]xi32>>
3645// CHECK: %[[M0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<[3]xi1>>
3646// CHECK: %[[S0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<[3]xf32>>
3647// CHECK: %[[P0:.*]] = llvm.getelementptr %[[B]][%[[I0]]] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32
3648// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %[[P0]], %[[M0]], %[[S0]] {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32>
3649// CHECK: %{{.*}} = llvm.insertvalue %[[G0]], %{{.*}}[0] : !llvm.array<2 x vector<[3]xf32>>
3650// CHECK: %[[I1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<[3]xi32>>
3651// CHECK: %[[M1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<[3]xi1>>
3652// CHECK: %[[S1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<[3]xf32>>
3653// CHECK: %[[P1:.*]] = llvm.getelementptr %[[B]][%[[I1]]] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32
3654// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %[[P1]], %[[M1]], %[[S1]] {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32>
3655// CHECK: %{{.*}} = llvm.insertvalue %[[G1]], %{{.*}}[1] : !llvm.array<2 x vector<[3]xf32>>
3656
3657// -----
3658
3659func.func @gather_with_mask(%arg0: memref<?xf32>, %arg1: vector<2x3xi32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> {
3660  %0 = arith.constant 0: index
3661  %1 = vector.constant_mask [1, 2] : vector<2x3xi1>
3662  %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x3xi32>, vector<2x3xi1>, vector<2x3xf32> into vector<2x3xf32>
3663  return %2 : vector<2x3xf32>
3664}
3665
3666// CHECK-LABEL: func @gather_with_mask
3667// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32>
3668// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32>
3669
3670// -----
3671
3672func.func @gather_with_mask_scalable(%arg0: memref<?xf32>, %arg1: vector<2x[3]xi32>, %arg2: vector<2x[3]xf32>) -> vector<2x[3]xf32> {
3673  %0 = arith.constant 0: index
3674  // vector.constant_mask only supports 'none set' or 'all set' scalable
3675  // dimensions, hence [1, 3] rather than [1, 2] as in the example for fixed
3676  // width vectors above.
3677  %1 = vector.constant_mask [1, 3] : vector<2x[3]xi1>
3678  %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x[3]xi32>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32>
3679  return %2 : vector<2x[3]xf32>
3680}
3681
3682// CHECK-LABEL: func @gather_with_mask_scalable
3683// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32>
3684// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32>
3685
3686
3687// -----
3688
3689func.func @gather_with_zero_mask(%arg0: memref<?xf32>, %arg1: vector<2x3xi32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> {
3690  %0 = arith.constant 0: index
3691  %1 = vector.constant_mask [0, 0] : vector<2x3xi1>
3692  %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x3xi32>, vector<2x3xi1>, vector<2x3xf32> into vector<2x3xf32>
3693  return %2 : vector<2x3xf32>
3694}
3695
3696// CHECK-LABEL: func @gather_with_zero_mask
3697// CHECK-SAME:    (%{{.*}}: memref<?xf32>, %{{.*}}: vector<2x3xi32>, %[[S:.*]]: vector<2x3xf32>)
3698// CHECK-NOT:   %{{.*}} = llvm.intr.masked.gather
3699// CHECK:       return %[[S]] : vector<2x3xf32>
3700
3701// -----
3702
3703func.func @gather_with_zero_mask_scalable(%arg0: memref<?xf32>, %arg1: vector<2x[3]xi32>, %arg2: vector<2x[3]xf32>) -> vector<2x[3]xf32> {
3704  %0 = arith.constant 0: index
3705  %1 = vector.constant_mask [0, 0] : vector<2x[3]xi1>
3706  %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x[3]xi32>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32>
3707  return %2 : vector<2x[3]xf32>
3708}
3709
3710// CHECK-LABEL: func @gather_with_zero_mask_scalable
3711// CHECK-SAME:    (%{{.*}}: memref<?xf32>, %{{.*}}: vector<2x[3]xi32>, %[[S:.*]]: vector<2x[3]xf32>)
3712// CHECK-NOT:   %{{.*}} = llvm.intr.masked.gather
3713// CHECK:       return %[[S]] : vector<2x[3]xf32>
3714
3715// -----
3716
3717func.func @gather_1d_from_2d(%arg0: memref<4x4xf32>, %arg1: vector<4xi32>, %arg2: vector<4xi1>, %arg3: vector<4xf32>) -> vector<4xf32> {
3718  %0 = arith.constant 3 : index
3719  %1 = vector.gather %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x4xf32>, vector<4xi32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
3720  return %1 : vector<4xf32>
3721}
3722
3723// CHECK-LABEL: func @gather_1d_from_2d
3724// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3725// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<4xi32>) -> !llvm.vec<4 x ptr>, f32
3726// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<4 x ptr>, vector<4xi1>, vector<4xf32>) -> vector<4xf32>
3727// CHECK: return %[[G]] : vector<4xf32>
3728
3729// -----
3730
3731func.func @gather_1d_from_2d_scalable(%arg0: memref<4x?xf32>, %arg1: vector<[4]xi32>, %arg2: vector<[4]xi1>, %arg3: vector<[4]xf32>) -> vector<[4]xf32> {
3732  %0 = arith.constant 3 : index
3733  %1 = vector.gather %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x?xf32>, vector<[4]xi32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32>
3734  return %1 : vector<[4]xf32>
3735}
3736
3737// CHECK-LABEL: func @gather_1d_from_2d_scalable
3738// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3739// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<[4]xi32>) -> !llvm.vec<? x 4 x ptr>, f32
3740// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 4 x ptr>, vector<[4]xi1>, vector<[4]xf32>) -> vector<[4]xf32>
3741// CHECK: return %[[G]] : vector<[4]xf32>
3742
3743// -----
3744
3745//===----------------------------------------------------------------------===//
3746// vector.scatter
3747//===----------------------------------------------------------------------===//
3748
3749func.func @scatter(%arg0: memref<?xf32>, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) {
3750  %0 = arith.constant 0: index
3751  vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<3xi32>, vector<3xi1>, vector<3xf32>
3752  return
3753}
3754
3755// CHECK-LABEL: func @scatter
3756// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32
3757// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<3xf32>, vector<3xi1> into !llvm.vec<3 x ptr>
3758
3759// -----
3760
3761func.func @scatter_scalable(%arg0: memref<?xf32>, %arg1: vector<[3]xi32>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xf32>) {
3762  %0 = arith.constant 0: index
3763  vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<[3]xi32>, vector<[3]xi1>, vector<[3]xf32>
3764  return
3765}
3766
3767// CHECK-LABEL: func @scatter_scalable
3768// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32
3769// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<[3]xf32>, vector<[3]xi1> into !llvm.vec<? x 3 x ptr>
3770
3771// -----
3772
3773func.func @scatter_index(%arg0: memref<?xindex>, %arg1: vector<3xindex>, %arg2: vector<3xi1>, %arg3: vector<3xindex>) {
3774  %0 = arith.constant 0: index
3775  vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<3xindex>, vector<3xi1>, vector<3xindex>
3776  return
3777}
3778
3779// CHECK-LABEL: func @scatter_index
3780// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi64>) -> !llvm.vec<3 x ptr>, i64
3781// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 8 : i32} : vector<3xi64>, vector<3xi1> into !llvm.vec<3 x ptr>
3782
3783// -----
3784
3785func.func @scatter_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[3]xindex>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xindex>) {
3786  %0 = arith.constant 0: index
3787  vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xindex>
3788  return
3789}
3790
3791// CHECK-LABEL: func @scatter_index_scalable
3792// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi64>) -> !llvm.vec<? x 3 x ptr>, i64
3793// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 8 : i32} : vector<[3]xi64>, vector<[3]xi1> into !llvm.vec<? x 3 x ptr>
3794
3795// -----
3796
3797func.func @scatter_1d_into_2d(%arg0: memref<4x4xf32>, %arg1: vector<4xi32>, %arg2: vector<4xi1>, %arg3: vector<4xf32>) {
3798  %0 = arith.constant 3 : index
3799  vector.scatter %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x4xf32>, vector<4xi32>, vector<4xi1>, vector<4xf32>
3800  return
3801}
3802
3803// CHECK-LABEL: func @scatter_1d_into_2d
3804// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3805// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<4xi32>) -> !llvm.vec<4 x ptr>, f32
3806// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<4xf32>, vector<4xi1> into !llvm.vec<4 x ptr>
3807
3808// -----
3809
3810func.func @scatter_1d_into_2d_scalable(%arg0: memref<4x?xf32>, %arg1: vector<[4]xi32>, %arg2: vector<[4]xi1>, %arg3: vector<[4]xf32>) {
3811  %0 = arith.constant 3 : index
3812  vector.scatter %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x?xf32>, vector<[4]xi32>, vector<[4]xi1>, vector<[4]xf32>
3813  return
3814}
3815
3816// CHECK-LABEL: func @scatter_1d_into_2d_scalable
3817// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3818// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<[4]xi32>) -> !llvm.vec<? x 4 x ptr>, f32
3819// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<[4]xf32>, vector<[4]xi1> into !llvm.vec<? x 4 x ptr>
3820
3821// -----
3822
3823//===----------------------------------------------------------------------===//
3824// vector.expandload
3825//===----------------------------------------------------------------------===//
3826
3827func.func @expand_load_op(%arg0: memref<?xf32>, %arg1: vector<11xi1>, %arg2: vector<11xf32>) -> vector<11xf32> {
3828  %c0 = arith.constant 0: index
3829  %0 = vector.expandload %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<11xi1>, vector<11xf32> into vector<11xf32>
3830  return %0 : vector<11xf32>
3831}
3832
3833// CHECK-LABEL: func @expand_load_op
3834// CHECK: %[[CO:.*]] = arith.constant 0 : index
3835// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64
3836// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3837// CHECK: %[[E:.*]] = "llvm.intr.masked.expandload"(%[[P]], %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<11xi1>, vector<11xf32>) -> vector<11xf32>
3838// CHECK: return %[[E]] : vector<11xf32>
3839
3840// -----
3841
3842func.func @expand_load_op_index(%arg0: memref<?xindex>, %arg1: vector<11xi1>, %arg2: vector<11xindex>) -> vector<11xindex> {
3843  %c0 = arith.constant 0: index
3844  %0 = vector.expandload %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<11xi1>, vector<11xindex> into vector<11xindex>
3845  return %0 : vector<11xindex>
3846}
3847// CHECK-LABEL: func @expand_load_op_index
3848// CHECK: %{{.*}} = "llvm.intr.masked.expandload"(%{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<11xi1>, vector<11xi64>) -> vector<11xi64>
3849
3850// -----
3851
3852//===----------------------------------------------------------------------===//
3853// vector.compressstore
3854//===----------------------------------------------------------------------===//
3855
3856func.func @compress_store_op(%arg0: memref<?xf32>, %arg1: vector<11xi1>, %arg2: vector<11xf32>) {
3857  %c0 = arith.constant 0: index
3858  vector.compressstore %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<11xi1>, vector<11xf32>
3859  return
3860}
3861
3862// CHECK-LABEL: func @compress_store_op
3863// CHECK: %[[CO:.*]] = arith.constant 0 : index
3864// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64
3865// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
3866// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %[[P]], %{{.*}}) : (vector<11xf32>, !llvm.ptr, vector<11xi1>) -> ()
3867
3868// -----
3869
3870func.func @compress_store_op_index(%arg0: memref<?xindex>, %arg1: vector<11xi1>, %arg2: vector<11xindex>) {
3871  %c0 = arith.constant 0: index
3872  vector.compressstore %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<11xi1>, vector<11xindex>
3873  return
3874}
3875// CHECK-LABEL: func @compress_store_op_index
3876// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<11xi64>, !llvm.ptr, vector<11xi1>) -> ()
3877
3878// -----
3879
3880//===----------------------------------------------------------------------===//
3881// vector.splat
3882//===----------------------------------------------------------------------===//
3883
3884// CHECK-LABEL: @splat_0d
3885// CHECK-SAME: %[[ELT:.*]]: f32
3886func.func @splat_0d(%elt: f32) -> vector<f32> {
3887  %v = vector.splat %elt : vector<f32>
3888  return %v : vector<f32>
3889}
3890// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<1xf32>
3891// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32
3892// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<1xf32>
3893// CHECK-NEXT: %[[VCAST:[0-9]+]] = builtin.unrealized_conversion_cast %[[V]] : vector<1xf32> to vector<f32>
3894// CHECK-NEXT: return %[[VCAST]] : vector<f32>
3895
3896// -----
3897
3898// CHECK-LABEL: @splat
3899// CHECK-SAME: %[[VEC:[0-9a-zA-Z]+]]: vector<4xf32>
3900// CHECK-SAME: %[[ELT:[0-9a-zA-Z]+]]: f32
3901func.func @splat(%vec: vector<4xf32>, %elt: f32) -> vector<4xf32> {
3902  %vb = vector.splat %elt : vector<4xf32>
3903  %r = arith.mulf %vec, %vb : vector<4xf32>
3904  return %r : vector<4xf32>
3905}
3906// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<4xf32>
3907// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32
3908// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<4xf32>
3909// CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0, 0, 0, 0]
3910// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[VEC]], %[[SPLAT]] : vector<4xf32>
3911// CHECK-NEXT: return %[[SCALE]] : vector<4xf32>
3912
3913// -----
3914
3915// CHECK-LABEL: @splat_scalable
3916// CHECK-SAME: %[[VEC:[0-9a-zA-Z]+]]: vector<[4]xf32>
3917// CHECK-SAME: %[[ELT:[0-9a-zA-Z]+]]: f32
3918func.func @splat_scalable(%vec: vector<[4]xf32>, %elt: f32) -> vector<[4]xf32> {
3919  %vb = vector.splat %elt : vector<[4]xf32>
3920  %r = arith.mulf %vec, %vb : vector<[4]xf32>
3921  return %r : vector<[4]xf32>
3922}
3923// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<[4]xf32>
3924// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32
3925// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<[4]xf32>
3926// CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0, 0, 0, 0]
3927// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[VEC]], %[[SPLAT]] : vector<[4]xf32>
3928// CHECK-NEXT: return %[[SCALE]] : vector<[4]xf32>
3929
3930// -----
3931
3932//===----------------------------------------------------------------------===//
3933// vector.scalable_insert
3934//===----------------------------------------------------------------------===//
3935
3936// CHECK-LABEL: @scalable_insert
3937// CHECK-SAME: %[[SUB:.*]]: vector<4xf32>, %[[SV:.*]]: vector<[4]xf32>
3938func.func @scalable_insert(%sub: vector<4xf32>, %dsv: vector<[4]xf32>) -> vector<[4]xf32> {
3939  // CHECK-NEXT: %[[TMP:.*]] = llvm.intr.vector.insert %[[SUB]], %[[SV]][0] : vector<4xf32> into vector<[4]xf32>
3940  %0 = vector.scalable.insert %sub, %dsv[0] : vector<4xf32> into vector<[4]xf32>
3941  // CHECK-NEXT: llvm.intr.vector.insert %[[SUB]], %[[TMP]][4] : vector<4xf32> into vector<[4]xf32>
3942  %1 = vector.scalable.insert %sub, %0[4] : vector<4xf32> into vector<[4]xf32>
3943  return %1 : vector<[4]xf32>
3944}
3945
3946// -----
3947
3948//===----------------------------------------------------------------------===//
3949// vector.scalable_extract
3950//===----------------------------------------------------------------------===//
3951
3952// CHECK-LABEL: @scalable_extract
3953// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32>
3954func.func @scalable_extract(%vec: vector<[4]xf32>) -> vector<8xf32> {
3955  // CHECK-NEXT: %{{.*}} = llvm.intr.vector.extract %[[VEC]][0] : vector<8xf32> from vector<[4]xf32>
3956  %0 = vector.scalable.extract %vec[0] : vector<8xf32> from vector<[4]xf32>
3957  return %0 : vector<8xf32>
3958}
3959
3960// -----
3961
3962//===----------------------------------------------------------------------===//
3963// vector.interleave
3964//===----------------------------------------------------------------------===//
3965
3966// CHECK-LABEL: @interleave_0d
3967//  CHECK-SAME:     %[[LHS:.*]]: vector<i8>, %[[RHS:.*]]: vector<i8>)
3968func.func @interleave_0d(%a: vector<i8>, %b: vector<i8>) -> vector<2xi8> {
3969  // CHECK-DAG: %[[LHS_RANK1:.*]] = builtin.unrealized_conversion_cast %[[LHS]] : vector<i8> to vector<1xi8>
3970  // CHECK-DAG: %[[RHS_RANK1:.*]] = builtin.unrealized_conversion_cast %[[RHS]] : vector<i8> to vector<1xi8>
3971  // CHECK: %[[ZIP:.*]] = llvm.shufflevector %[[LHS_RANK1]], %[[RHS_RANK1]] [0, 1] : vector<1xi8>
3972  // CHECK: return %[[ZIP]]
3973  %0 = vector.interleave %a, %b : vector<i8> -> vector<2xi8>
3974  return %0 : vector<2xi8>
3975}
3976
3977// -----
3978
3979// CHECK-LABEL: @interleave_1d
3980//  CHECK-SAME:     %[[LHS:.*]]: vector<8xf32>, %[[RHS:.*]]: vector<8xf32>)
3981func.func @interleave_1d(%a: vector<8xf32>, %b: vector<8xf32>) -> vector<16xf32> {
3982  // CHECK: %[[ZIP:.*]] = llvm.shufflevector %[[LHS]], %[[RHS]] [0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15] : vector<8xf32>
3983  // CHECK: return %[[ZIP]]
3984  %0 = vector.interleave %a, %b : vector<8xf32> -> vector<16xf32>
3985  return %0 : vector<16xf32>
3986}
3987
3988// -----
3989
3990// CHECK-LABEL: @interleave_1d_scalable
3991//  CHECK-SAME:     %[[LHS:.*]]: vector<[4]xi32>, %[[RHS:.*]]: vector<[4]xi32>)
3992func.func @interleave_1d_scalable(%a: vector<[4]xi32>, %b: vector<[4]xi32>) -> vector<[8]xi32> {
3993  // CHECK: %[[ZIP:.*]] = "llvm.intr.vector.interleave2"(%[[LHS]], %[[RHS]]) : (vector<[4]xi32>, vector<[4]xi32>) -> vector<[8]xi32>
3994  // CHECK: return %[[ZIP]]
3995  %0 = vector.interleave %a, %b : vector<[4]xi32> -> vector<[8]xi32>
3996  return %0 : vector<[8]xi32>
3997}
3998
3999// -----
4000
4001// CHECK-LABEL: @interleave_2d
4002//  CHECK-SAME:     %[[LHS:.*]]: vector<2x3xi8>, %[[RHS:.*]]: vector<2x3xi8>)
4003func.func @interleave_2d(%a: vector<2x3xi8>, %b: vector<2x3xi8>) -> vector<2x6xi8> {
4004  // CHECK: llvm.shufflevector
4005  // CHECK-NOT: vector.interleave {{.*}} : vector<2x3xi8>
4006  %0 = vector.interleave %a, %b : vector<2x3xi8> -> vector<2x6xi8>
4007  return %0 : vector<2x6xi8>
4008}
4009
4010// -----
4011
4012// CHECK-LABEL: @interleave_2d_scalable
4013//  CHECK-SAME:     %[[LHS:.*]]: vector<2x[8]xi16>, %[[RHS:.*]]: vector<2x[8]xi16>)
4014func.func @interleave_2d_scalable(%a: vector<2x[8]xi16>, %b: vector<2x[8]xi16>) -> vector<2x[16]xi16> {
4015  // CHECK: llvm.intr.vector.interleave2
4016  // CHECK-NOT: vector.interleave {{.*}} : vector<2x[8]xi16>
4017  %0 = vector.interleave %a, %b : vector<2x[8]xi16> -> vector<2x[16]xi16>
4018  return %0 : vector<2x[16]xi16>
4019}
4020
4021// -----
4022
4023//===----------------------------------------------------------------------===//
4024// vector.deinterleave
4025//===----------------------------------------------------------------------===//
4026
4027// CHECK-LABEL: @deinterleave_1d
4028// CHECK-SAME:  (%[[ARG:.*]]: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>)
4029func.func @deinterleave_1d(%arg: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) {
4030  // CHECK: %[[POISON:.*]] = llvm.mlir.poison : vector<4xi32>
4031  // CHECK: llvm.shufflevector %[[ARG]], %[[POISON]] [0, 2] : vector<4xi32>
4032  // CHECK: llvm.shufflevector %[[ARG]], %[[POISON]] [1, 3] : vector<4xi32>
4033  %0, %1 = vector.deinterleave %arg : vector<4xi32> -> vector<2xi32>
4034  return %0, %1 : vector<2xi32>, vector<2xi32>
4035}
4036
4037// -----
4038
4039// CHECK-LABEL: @deinterleave_1d_scalable
4040// CHECK-SAME:  %[[ARG:.*]]: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>)
4041func.func @deinterleave_1d_scalable(%arg: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) {
4042    // CHECK: %[[RES:.*]] = "llvm.intr.vector.deinterleave2"(%[[ARG]]) : (vector<[4]xi32>) -> !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)>
4043    // CHECK: llvm.extractvalue %[[RES]][0] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)>
4044    // CHECK: llvm.extractvalue %[[RES]][1] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)>
4045    %0, %1 = vector.deinterleave %arg : vector<[4]xi32> -> vector<[2]xi32>
4046    return %0, %1 : vector<[2]xi32>, vector<[2]xi32>
4047}
4048
4049// -----
4050
4051// CHECK-LABEL: @deinterleave_2d
4052// CHECK-SAME: %[[ARG:.*]]: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>)
4053func.func @deinterleave_2d(%arg: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>) {
4054  // CHECK: llvm.shufflevector
4055  // CHECK-NOT: vector.deinterleave %{{.*}} : vector<2x8xf32>
4056  %0, %1 = vector.deinterleave %arg : vector<2x8xf32> -> vector<2x4xf32>
4057  return %0, %1 : vector<2x4xf32>, vector<2x4xf32>
4058}
4059
4060// -----
4061
4062func.func @deinterleave_2d_scalable(%arg: vector<2x[8]xf32>) -> (vector<2x[4]xf32>, vector<2x[4]xf32>) {
4063    // CHECK: llvm.intr.vector.deinterleave2
4064    // CHECK-NOT: vector.deinterleave %{{.*}} : vector<2x[8]xf32>
4065    %0, %1 = vector.deinterleave %arg : vector<2x[8]xf32> -> vector<2x[4]xf32>
4066    return %0, %1 : vector<2x[4]xf32>, vector<2x[4]xf32>
4067}
4068
4069// -----
4070
4071//===----------------------------------------------------------------------===//
4072// vector.from_elements
4073//===----------------------------------------------------------------------===//
4074
4075// CHECK-LABEL: func.func @from_elements_1d(
4076//  CHECK-SAME:     %[[ARG_0:.*]]: f32, %[[ARG_1:.*]]: f32)
4077//       CHECK:   %[[UNDEF:.*]] = llvm.mlir.undef : vector<3xf32>
4078//       CHECK:   %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
4079//       CHECK:   %[[INSERT0:.*]] = llvm.insertelement %[[ARG_0]], %[[UNDEF]][%[[C0]] : i64] : vector<3xf32>
4080//       CHECK:   %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
4081//       CHECK:   %[[INSERT1:.*]] = llvm.insertelement %[[ARG_1]], %[[INSERT0]][%[[C1]] : i64] : vector<3xf32>
4082//       CHECK:   %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64
4083//       CHECK:   %[[INSERT2:.*]] = llvm.insertelement %[[ARG_0]], %[[INSERT1]][%[[C2]] : i64] : vector<3xf32>
4084//       CHECK:   return %[[INSERT2]]
4085func.func @from_elements_1d(%arg0: f32, %arg1: f32) -> vector<3xf32> {
4086  %0 = vector.from_elements %arg0, %arg1, %arg0 : vector<3xf32>
4087  return %0 : vector<3xf32>
4088}
4089
4090// -----
4091
4092// CHECK-LABEL: func.func @from_elements_0d(
4093//  CHECK-SAME:     %[[ARG_0:.*]]: f32)
4094//       CHECK:   %[[UNDEF:.*]] = llvm.mlir.undef : vector<1xf32>
4095//       CHECK:   %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
4096//       CHECK:   %[[INSERT0:.*]] = llvm.insertelement %[[ARG_0]], %[[UNDEF]][%[[C0]] : i64] : vector<1xf32>
4097//       CHECK:   %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[INSERT0]] : vector<1xf32> to vector<f32>
4098//       CHECK:   return %[[CAST]]
4099func.func @from_elements_0d(%arg0: f32) -> vector<f32> {
4100  %0 = vector.from_elements %arg0 : vector<f32>
4101  return %0 : vector<f32>
4102}
4103
4104// -----
4105
4106//===----------------------------------------------------------------------===//
4107// vector.step
4108//===----------------------------------------------------------------------===//
4109
4110// CHECK-LABEL: @step
4111// CHECK: %[[CST:.+]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
4112// CHECK: return %[[CST]] : vector<4xindex>
4113func.func @step() -> vector<4xindex> {
4114  %0 = vector.step : vector<4xindex>
4115  return %0 : vector<4xindex>
4116}
4117
4118// -----
4119
4120// CHECK-LABEL: @step_scalable
4121// CHECK: %[[STEPVECTOR:.*]] = llvm.intr.stepvector : vector<[4]xi64>
4122// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[STEPVECTOR]] : vector<[4]xi64> to vector<[4]xindex>
4123// CHECK: return %[[CAST]] : vector<[4]xindex>
4124func.func @step_scalable() -> vector<[4]xindex> {
4125  %0 = vector.step : vector<[4]xindex>
4126  return %0 : vector<[4]xindex>
4127}
4128