xref: /llvm-project/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir (revision abcbbe711410da051126ca5b11a1ea227cb238f8)
1// RUN: mlir-opt %s -convert-vector-to-llvm -split-input-file | FileCheck %s
2// RUN: mlir-opt %s -convert-vector-to-llvm='reassociate-fp-reductions' -split-input-file | FileCheck %s --check-prefix=REASSOC
3
4// CHECK-LABEL: @reduce_add_f32(
5// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
6//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
7//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
8// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f32, vector<16xf32>) -> f32
9//      CHECK: return %[[V]] : f32
10//
11// REASSOC-LABEL: @reduce_add_f32(
12// REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
13//      REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
14//      REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
15// REASSOC-SAME: <{fastmathFlags = #llvm.fastmath<reassoc>}> : (f32, vector<16xf32>) -> f32
16//      REASSOC: return %[[V]] : f32
17//
18func.func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 {
19  %0 = vector.reduction <add>, %arg0 : vector<16xf32> into f32
20  return %0 : f32
21}
22
23// -----
24
25// CHECK-LABEL: @reduce_add_f32_always_reassoc(
26// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
27//      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
28//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
29/// Note: the reassoc flag remains even though the pass sets reassociate-fp-reduction to false.
30/// Ponder whether this flag really is a property of the pass / pattern..
31// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<reassoc>}> : (f32, vector<16xf32>) -> f32
32//      CHECK: return %[[V]] : f32
33//
34// REASSOC-LABEL: @reduce_add_f32_always_reassoc(
35// REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
36//      REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
37//      REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
38// REASSOC-SAME: <{fastmathFlags = #llvm.fastmath<reassoc>}> : (f32, vector<16xf32>) -> f32
39//      REASSOC: return %[[V]] : f32
40//
41func.func @reduce_add_f32_always_reassoc(%arg0: vector<16xf32>) -> f32 {
42  %0 = vector.reduction <add>, %arg0 fastmath<reassoc> : vector<16xf32> into f32
43  return %0 : f32
44}
45
46// -----
47
48// CHECK-LABEL: @reduce_mul_f32(
49// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
50//      CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
51//      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
52// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<nnan, ninf>}> : (f32, vector<16xf32>) -> f32
53//      CHECK: return %[[V]] : f32
54//
55// REASSOC-LABEL: @reduce_mul_f32(
56// REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
57//      REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
58//      REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
59// REASSOC-SAME: <{fastmathFlags = #llvm.fastmath<nnan, ninf, reassoc>}> : (f32, vector<16xf32>) -> f32
60//      REASSOC: return %[[V]] : f32
61//
62func.func @reduce_mul_f32(%arg0: vector<16xf32>) -> f32 {
63  %0 = vector.reduction <mul>, %arg0 fastmath<nnan, ninf> : vector<16xf32> into f32
64  return %0 : f32
65}
66
67// -----
68
69func.func @masked_reduce_add_f32(%arg0: vector<16xf32>, %mask : vector<16xi1>) -> f32 {
70  %0 = vector.mask %mask { vector.reduction <add>, %arg0 : vector<16xf32> into f32 } : vector<16xi1> -> f32
71  return %0 : f32
72}
73
74// CHECK-LABEL:   func.func @masked_reduce_add_f32(
75// CHECK-SAME:                              %[[INPUT:.*]]: vector<16xf32>,
76// CHECK-SAME:                              %[[MASK:.*]]: vector<16xi1>) -> f32 {
77// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
78// CHECK:           %[[VL:.*]] = llvm.mlir.constant(16 : i32) : i32
79// CHECK:           "llvm.intr.vp.reduce.fadd"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (f32, vector<16xf32>, vector<16xi1>, i32) -> f32
80
81
82// -----
83
84func.func @masked_reduce_add_f32_scalable(%arg0: vector<[16]xf32>, %mask : vector<[16]xi1>) -> f32 {
85  %0 = vector.mask %mask { vector.reduction <add>, %arg0 : vector<[16]xf32> into f32 } : vector<[16]xi1> -> f32
86  return %0 : f32
87}
88
89// CHECK-LABEL:   func.func @masked_reduce_add_f32_scalable(
90// CHECK-SAME:                              %[[INPUT:.*]]: vector<[16]xf32>,
91// CHECK-SAME:                              %[[MASK:.*]]: vector<[16]xi1>) -> f32 {
92// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
93// CHECK:           %[[VL_BASE:.*]] = llvm.mlir.constant(16 : i32) : i32
94// CHECK:           %[[VSCALE:.*]] = "llvm.intr.vscale"() : () -> i64
95// CHECK:           %[[CAST_IDX:.*]] = builtin.unrealized_conversion_cast %[[VSCALE]] : i64 to index
96// CHECK:           %[[CAST_I32:.*]] = arith.index_cast %[[CAST_IDX]] : index to i32
97// CHECK:           %[[VL_MUL:.*]] = arith.muli %[[VL_BASE]], %[[CAST_I32]] : i32
98// CHECK:           "llvm.intr.vp.reduce.fadd"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL_MUL]]) : (f32, vector<[16]xf32>, vector<[16]xi1>, i32) -> f32
99
100
101// -----
102
103func.func @masked_reduce_mul_f32(%arg0: vector<16xf32>, %mask : vector<16xi1>) -> f32 {
104  %0 = vector.mask %mask { vector.reduction <mul>, %arg0 : vector<16xf32> into f32 } : vector<16xi1> -> f32
105  return %0 : f32
106}
107
108// CHECK-LABEL:   func.func @masked_reduce_mul_f32(
109// CHECK-SAME:                              %[[INPUT:.*]]: vector<16xf32>,
110// CHECK-SAME:                              %[[MASK:.*]]: vector<16xi1>) -> f32 {
111// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
112// CHECK:           %[[VL:.*]] = llvm.mlir.constant(16 : i32) : i32
113// CHECK:           "llvm.intr.vp.reduce.fmul"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (f32, vector<16xf32>, vector<16xi1>, i32) -> f32
114
115
116// -----
117
118func.func @masked_reduce_minf_f32(%arg0: vector<16xf32>, %mask : vector<16xi1>) -> f32 {
119  %0 = vector.mask %mask { vector.reduction <minnumf>, %arg0 : vector<16xf32> into f32 } : vector<16xi1> -> f32
120  return %0 : f32
121}
122
123// CHECK-LABEL:   func.func @masked_reduce_minf_f32(
124// CHECK-SAME:                                      %[[INPUT:.*]]: vector<16xf32>,
125// CHECK-SAME:                                      %[[MASK:.*]]: vector<16xi1>) -> f32 {
126// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0xFFC00000 : f32) : f32
127// CHECK:           %[[VL:.*]] = llvm.mlir.constant(16 : i32) : i32
128// CHECK:           "llvm.intr.vp.reduce.fmin"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (f32, vector<16xf32>, vector<16xi1>, i32) -> f32
129
130// -----
131
132func.func @masked_reduce_minf_f32_scalable(%arg0: vector<[16]xf32>, %mask : vector<[16]xi1>) -> f32 {
133  %0 = vector.mask %mask { vector.reduction <minnumf>, %arg0 : vector<[16]xf32> into f32 } : vector<[16]xi1> -> f32
134  return %0 : f32
135}
136
137// CHECK-LABEL:   func.func @masked_reduce_minf_f32_scalable(
138// CHECK-SAME:                                      %[[INPUT:.*]]: vector<[16]xf32>,
139// CHECK-SAME:                                      %[[MASK:.*]]: vector<[16]xi1>) -> f32 {
140// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0xFFC00000 : f32) : f32
141// CHECK:           %[[VL_BASE:.*]] = llvm.mlir.constant(16 : i32) : i32
142// CHECK:           %[[VSCALE:.*]] = "llvm.intr.vscale"() : () -> i64
143// CHECK:           %[[CAST_IDX:.*]] = builtin.unrealized_conversion_cast %[[VSCALE]] : i64 to index
144// CHECK:           %[[CAST_I32:.*]] = arith.index_cast %[[CAST_IDX]] : index to i32
145// CHECK:           %[[VL_MUL:.*]] = arith.muli %[[VL_BASE]], %[[CAST_I32]] : i32
146// CHECK:           "llvm.intr.vp.reduce.fmin"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL_MUL]]) : (f32, vector<[16]xf32>, vector<[16]xi1>, i32) -> f32
147
148// -----
149
150func.func @masked_reduce_maxf_f32(%arg0: vector<16xf32>, %mask : vector<16xi1>) -> f32 {
151  %0 = vector.mask %mask { vector.reduction <maxnumf>, %arg0 : vector<16xf32> into f32 } : vector<16xi1> -> f32
152  return %0 : f32
153}
154
155// CHECK-LABEL:   func.func @masked_reduce_maxf_f32(
156// CHECK-SAME:                                      %[[INPUT:.*]]: vector<16xf32>,
157// CHECK-SAME:                                      %[[MASK:.*]]: vector<16xi1>) -> f32 {
158// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0x7FC00000 : f32) : f32
159// CHECK:           %[[VL:.*]] = llvm.mlir.constant(16 : i32) : i32
160// CHECK:           "llvm.intr.vp.reduce.fmax"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (f32, vector<16xf32>, vector<16xi1>, i32) -> f32
161
162// -----
163
164func.func @masked_reduce_maximumf_f32(%arg0: vector<16xf32>, %mask : vector<16xi1>) -> f32 {
165  %0 = vector.mask %mask { vector.reduction <maximumf>, %arg0 : vector<16xf32> into f32 } : vector<16xi1> -> f32
166  return %0 : f32
167}
168
169// CHECK-LABEL:   func.func @masked_reduce_maximumf_f32(
170// CHECK-SAME:                                      %[[INPUT:.*]]: vector<16xf32>,
171// CHECK-SAME:                                      %[[MASK:.*]]: vector<16xi1>) -> f32 {
172// CHECK:           %[[MASK_NEUTRAL:.*]] = llvm.mlir.constant(dense<-1.401300e-45> : vector<16xf32>) : vector<16xf32>
173// CHECK:           %[[MASKED:.*]] = llvm.select %[[MASK]], %[[INPUT]], %[[MASK_NEUTRAL]] : vector<16xi1>, vector<16xf32>
174// CHECK:           %[[RESULT:.*]] = llvm.intr.vector.reduce.fmaximum(%[[MASKED]])  : (vector<16xf32>) -> f32
175// CHECK:           return %[[RESULT]]
176
177// -----
178
179func.func @masked_reduce_minimumf_f32(%arg0: vector<16xf32>, %mask : vector<16xi1>) -> f32 {
180  %0 = vector.mask %mask { vector.reduction <minimumf>, %arg0 : vector<16xf32> into f32 } : vector<16xi1> -> f32
181  return %0 : f32
182}
183
184// CHECK-LABEL:   func.func @masked_reduce_minimumf_f32(
185// CHECK-SAME:                                      %[[INPUT:.*]]: vector<16xf32>,
186// CHECK-SAME:                                      %[[MASK:.*]]: vector<16xi1>) -> f32 {
187// CHECK:           %[[MASK_NEUTRAL:.*]] = llvm.mlir.constant(dense<3.40282347E+38> : vector<16xf32>) : vector<16xf32>
188// CHECK:           %[[MASKED:.*]] = llvm.select %[[MASK]], %[[INPUT]], %[[MASK_NEUTRAL]] : vector<16xi1>, vector<16xf32>
189// CHECK:           %[[RESULT:.*]] = llvm.intr.vector.reduce.fminimum(%[[MASKED]])  : (vector<16xf32>) -> f32
190// CHECK:           return %[[RESULT]]
191
192// -----
193
194func.func @masked_reduce_add_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
195  %0 = vector.mask %mask { vector.reduction <add>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
196  return %0 : i8
197}
198
199// CHECK-LABEL:   func.func @masked_reduce_add_i8(
200// CHECK-SAME:                             %[[INPUT:.*]]: vector<32xi8>,
201// CHECK-SAME:                             %[[MASK:.*]]: vector<32xi1>) -> i8 {
202// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0 : i8) : i8
203// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
204// CHECK:           "llvm.intr.vp.reduce.add"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
205
206
207// -----
208
209func.func @masked_reduce_add_i8_scalable(%arg0: vector<[32]xi8>, %mask : vector<[32]xi1>) -> i8 {
210  %0 = vector.mask %mask { vector.reduction <add>, %arg0 : vector<[32]xi8> into i8 } : vector<[32]xi1> -> i8
211  return %0 : i8
212}
213
214// CHECK-LABEL:   func.func @masked_reduce_add_i8_scalable(
215// CHECK-SAME:                             %[[INPUT:.*]]: vector<[32]xi8>,
216// CHECK-SAME:                             %[[MASK:.*]]: vector<[32]xi1>) -> i8 {
217// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0 : i8) : i8
218// CHECK:           %[[VL_BASE:.*]] = llvm.mlir.constant(32 : i32) : i32
219// CHECK:           %[[VSCALE:.*]] = "llvm.intr.vscale"() : () -> i64
220// CHECK:           %[[CAST_IDX:.*]] = builtin.unrealized_conversion_cast %[[VSCALE]] : i64 to index
221// CHECK:           %[[CAST_I32:.*]] = arith.index_cast %[[CAST_IDX]] : index to i32
222// CHECK:           %[[VL_MUL:.*]] = arith.muli %[[VL_BASE]], %[[CAST_I32]] : i32
223// CHECK:           "llvm.intr.vp.reduce.add"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL_MUL]]) : (i8, vector<[32]xi8>, vector<[32]xi1>, i32) -> i8
224
225
226// -----
227
228func.func @masked_reduce_mul_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
229  %0 = vector.mask %mask { vector.reduction <mul>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
230  return %0 : i8
231}
232
233// CHECK-LABEL:   func.func @masked_reduce_mul_i8(
234// CHECK-SAME:                             %[[INPUT:.*]]: vector<32xi8>,
235// CHECK-SAME:                             %[[MASK:.*]]: vector<32xi1>) -> i8 {
236// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(1 : i8) : i8
237// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
238// CHECK:           %[[VAL_4:.*]] = "llvm.intr.vp.reduce.mul"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
239
240// -----
241
242func.func @masked_reduce_minui_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
243  %0 = vector.mask %mask { vector.reduction <minui>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
244  return %0 : i8
245}
246
247// CHECK-LABEL:   func.func @masked_reduce_minui_i8(
248// CHECK-SAME:                               %[[INPUT:.*]]: vector<32xi8>,
249// CHECK-SAME:                               %[[MASK:.*]]: vector<32xi1>) -> i8 {
250// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(-1 : i8) : i8
251// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
252// CHECK:           "llvm.intr.vp.reduce.umin"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
253
254// -----
255
256func.func @masked_reduce_minui_i8_scalable(%arg0: vector<[32]xi8>, %mask : vector<[32]xi1>) -> i8 {
257  %0 = vector.mask %mask { vector.reduction <minui>, %arg0 : vector<[32]xi8> into i8 } : vector<[32]xi1> -> i8
258  return %0 : i8
259}
260
261// CHECK-LABEL:   func.func @masked_reduce_minui_i8_scalable(
262// CHECK-SAME:                               %[[INPUT:.*]]: vector<[32]xi8>,
263// CHECK-SAME:                               %[[MASK:.*]]: vector<[32]xi1>) -> i8 {
264// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(-1 : i8) : i8
265// CHECK:           %[[VL_BASE:.*]] = llvm.mlir.constant(32 : i32) : i32
266// CHECK:           %[[VSCALE:.*]] = "llvm.intr.vscale"() : () -> i64
267// CHECK:           %[[CAST_IDX:.*]] = builtin.unrealized_conversion_cast %[[VSCALE]] : i64 to index
268// CHECK:           %[[CAST_I32:.*]] = arith.index_cast %[[CAST_IDX]] : index to i32
269// CHECK:           %[[VL_MUL:.*]] = arith.muli %[[VL_BASE]], %[[CAST_I32]] : i32
270// CHECK:           "llvm.intr.vp.reduce.umin"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL_MUL]]) : (i8, vector<[32]xi8>, vector<[32]xi1>, i32) -> i8
271
272// -----
273
274func.func @masked_reduce_maxui_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
275  %0 = vector.mask %mask { vector.reduction <maxui>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
276  return %0 : i8
277}
278
279// CHECK-LABEL:   func.func @masked_reduce_maxui_i8(
280// CHECK-SAME:                               %[[INPUT:.*]]: vector<32xi8>,
281// CHECK-SAME:                               %[[MASK:.*]]: vector<32xi1>) -> i8 {
282// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0 : i8) : i8
283// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
284// CHECK:           "llvm.intr.vp.reduce.umax"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
285
286// -----
287
288func.func @masked_reduce_minsi_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
289  %0 = vector.mask %mask { vector.reduction <minsi>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
290  return %0 : i8
291}
292
293// CHECK-LABEL:   func.func @masked_reduce_minsi_i8(
294// CHECK-SAME:                               %[[INPUT:.*]]: vector<32xi8>,
295// CHECK-SAME:                               %[[MASK:.*]]: vector<32xi1>) -> i8 {
296// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(127 : i8) : i8
297// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
298// CHECK:           "llvm.intr.vp.reduce.smin"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
299
300// -----
301
302func.func @masked_reduce_maxsi_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
303  %0 = vector.mask %mask { vector.reduction <maxsi>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
304  return %0 : i8
305}
306
307// CHECK-LABEL:   func.func @masked_reduce_maxsi_i8(
308// CHECK-SAME:                               %[[INPUT:.*]]: vector<32xi8>,
309// CHECK-SAME:                               %[[MASK:.*]]: vector<32xi1>) -> i8 {
310// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(-128 : i8) : i8
311// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
312// CHECK:           "llvm.intr.vp.reduce.smax"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
313
314// -----
315
316func.func @masked_reduce_maxsi_i8_scalable(%arg0: vector<[32]xi8>, %mask : vector<[32]xi1>) -> i8 {
317  %0 = vector.mask %mask { vector.reduction <maxsi>, %arg0 : vector<[32]xi8> into i8 } : vector<[32]xi1> -> i8
318  return %0 : i8
319}
320
321// CHECK-LABEL:   func.func @masked_reduce_maxsi_i8_scalable(
322// CHECK-SAME:                               %[[INPUT:.*]]: vector<[32]xi8>,
323// CHECK-SAME:                               %[[MASK:.*]]: vector<[32]xi1>) -> i8 {
324// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(-128 : i8) : i8
325// CHECK:           %[[VL_BASE:.*]] = llvm.mlir.constant(32 : i32) : i32
326// CHECK:           %[[VSCALE:.*]] = "llvm.intr.vscale"() : () -> i64
327// CHECK:           %[[CAST_IDX:.*]] = builtin.unrealized_conversion_cast %[[VSCALE]] : i64 to index
328// CHECK:           %[[CAST_I32:.*]] = arith.index_cast %[[CAST_IDX]] : index to i32
329// CHECK:           %[[VL_MUL:.*]] = arith.muli %[[VL_BASE]], %[[CAST_I32]] : i32
330// CHECK:           "llvm.intr.vp.reduce.smax"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL_MUL]]) : (i8, vector<[32]xi8>, vector<[32]xi1>, i32) -> i8
331
332// -----
333
334func.func @masked_reduce_or_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
335  %0 = vector.mask %mask { vector.reduction <or>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
336  return %0 : i8
337}
338
339// CHECK-LABEL:   func.func @masked_reduce_or_i8(
340// CHECK-SAME:                            %[[INPUT:.*]]: vector<32xi8>,
341// CHECK-SAME:                            %[[MASK:.*]]: vector<32xi1>) -> i8 {
342// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0 : i8) : i8
343// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
344// CHECK:           "llvm.intr.vp.reduce.or"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
345
346
347// -----
348
349func.func @masked_reduce_and_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
350  %0 = vector.mask %mask { vector.reduction <and>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
351  return %0 : i8
352}
353
354// CHECK-LABEL:   func.func @masked_reduce_and_i8(
355// CHECK-SAME:                             %[[INPUT:.*]]: vector<32xi8>,
356// CHECK-SAME:                             %[[MASK:.*]]: vector<32xi1>) -> i8 {
357// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(-1 : i8) : i8
358// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
359// CHECK:           "llvm.intr.vp.reduce.and"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
360
361// -----
362
363func.func @masked_reduce_xor_i8(%arg0: vector<32xi8>, %mask : vector<32xi1>) -> i8 {
364  %0 = vector.mask %mask { vector.reduction <xor>, %arg0 : vector<32xi8> into i8 } : vector<32xi1> -> i8
365  return %0 : i8
366}
367
368// CHECK-LABEL:   func.func @masked_reduce_xor_i8(
369// CHECK-SAME:                             %[[INPUT:.*]]: vector<32xi8>,
370// CHECK-SAME:                             %[[MASK:.*]]: vector<32xi1>) -> i8 {
371// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0 : i8) : i8
372// CHECK:           %[[VL:.*]] = llvm.mlir.constant(32 : i32) : i32
373// CHECK:           "llvm.intr.vp.reduce.xor"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL]]) : (i8, vector<32xi8>, vector<32xi1>, i32) -> i8
374
375// -----
376
377func.func @masked_reduce_xor_i8_scalable(%arg0: vector<[32]xi8>, %mask : vector<[32]xi1>) -> i8 {
378  %0 = vector.mask %mask { vector.reduction <xor>, %arg0 : vector<[32]xi8> into i8 } : vector<[32]xi1> -> i8
379  return %0 : i8
380}
381
382// CHECK-LABEL:   func.func @masked_reduce_xor_i8_scalable(
383// CHECK-SAME:                             %[[INPUT:.*]]: vector<[32]xi8>,
384// CHECK-SAME:                             %[[MASK:.*]]: vector<[32]xi1>) -> i8 {
385// CHECK:           %[[NEUTRAL:.*]] = llvm.mlir.constant(0 : i8) : i8
386// CHECK:           %[[VL_BASE:.*]] = llvm.mlir.constant(32 : i32) : i32
387// CHECK:           %[[VSCALE:.*]] = "llvm.intr.vscale"() : () -> i64
388// CHECK:           %[[CAST_IDX:.*]] = builtin.unrealized_conversion_cast %[[VSCALE]] : i64 to index
389// CHECK:           %[[CAST_I32:.*]] = arith.index_cast %[[CAST_IDX]] : index to i32
390// CHECK:           %[[VL_MUL:.*]] = arith.muli %[[VL_BASE]], %[[CAST_I32]] : i32
391// CHECK:           "llvm.intr.vp.reduce.xor"(%[[NEUTRAL]], %[[INPUT]], %[[MASK]], %[[VL_MUL]]) : (i8, vector<[32]xi8>, vector<[32]xi1>, i32) -> i8
392
393
394