xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll (revision d9f165ddea3223217a959c3cea3d2c613b132935)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV32
3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV64
4
5define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) {
6; CHECK-LABEL: extract_last_i8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
9; CHECK-NEXT:    vmsne.vi v0, v9, 0
10; CHECK-NEXT:    vmv.v.i v9, 0
11; CHECK-NEXT:    vcpop.m a1, v0
12; CHECK-NEXT:    vid.v v9, v0.t
13; CHECK-NEXT:    beqz a1, .LBB0_2
14; CHECK-NEXT:  # %bb.1:
15; CHECK-NEXT:    vredmaxu.vs v9, v9, v9
16; CHECK-NEXT:    vmv.x.s a0, v9
17; CHECK-NEXT:    andi a0, a0, 255
18; CHECK-NEXT:    vslidedown.vx v8, v8, a0
19; CHECK-NEXT:    vmv.x.s a0, v8
20; CHECK-NEXT:  .LBB0_2:
21; CHECK-NEXT:    ret
22  %notzero = icmp ne <16 x i8> %mask, zeroinitializer
23  %res = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> %data, <16 x i1> %notzero, i8 %passthru)
24  ret i8 %res
25}
26
27define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) {
28; CHECK-LABEL: extract_last_i16:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
31; CHECK-NEXT:    vmsne.vi v0, v9, 0
32; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
33; CHECK-NEXT:    vmv.v.i v9, 0
34; CHECK-NEXT:    vcpop.m a1, v0
35; CHECK-NEXT:    vid.v v9, v0.t
36; CHECK-NEXT:    beqz a1, .LBB1_2
37; CHECK-NEXT:  # %bb.1:
38; CHECK-NEXT:    vredmaxu.vs v9, v9, v9
39; CHECK-NEXT:    vmv.x.s a0, v9
40; CHECK-NEXT:    andi a0, a0, 255
41; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
42; CHECK-NEXT:    vslidedown.vx v8, v8, a0
43; CHECK-NEXT:    vmv.x.s a0, v8
44; CHECK-NEXT:  .LBB1_2:
45; CHECK-NEXT:    ret
46  %notzero = icmp ne <8 x i16> %mask, zeroinitializer
47  %res = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> %data, <8 x i1> %notzero, i16 %passthru)
48  ret i16 %res
49}
50
51define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) {
52; CHECK-LABEL: extract_last_i32:
53; CHECK:       # %bb.0:
54; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
55; CHECK-NEXT:    vmsne.vi v0, v9, 0
56; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
57; CHECK-NEXT:    vmv.v.i v9, 0
58; CHECK-NEXT:    vcpop.m a1, v0
59; CHECK-NEXT:    vid.v v9, v0.t
60; CHECK-NEXT:    beqz a1, .LBB2_2
61; CHECK-NEXT:  # %bb.1:
62; CHECK-NEXT:    vredmaxu.vs v9, v9, v9
63; CHECK-NEXT:    vmv.x.s a0, v9
64; CHECK-NEXT:    andi a0, a0, 255
65; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
66; CHECK-NEXT:    vslidedown.vx v8, v8, a0
67; CHECK-NEXT:    vmv.x.s a0, v8
68; CHECK-NEXT:  .LBB2_2:
69; CHECK-NEXT:    ret
70  %notzero = icmp ne <4 x i32> %mask, zeroinitializer
71  %res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %data, <4 x i1> %notzero, i32 %passthru)
72  ret i32 %res
73}
74
75define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
76; RV32-LABEL: extract_last_i64:
77; RV32:       # %bb.0:
78; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
79; RV32-NEXT:    vmsne.vi v0, v9, 0
80; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
81; RV32-NEXT:    vmv.v.i v9, 0
82; RV32-NEXT:    vcpop.m a2, v0
83; RV32-NEXT:    vid.v v9, v0.t
84; RV32-NEXT:    beqz a2, .LBB3_2
85; RV32-NEXT:  # %bb.1:
86; RV32-NEXT:    vredmaxu.vs v9, v9, v9
87; RV32-NEXT:    li a1, 32
88; RV32-NEXT:    vmv.x.s a0, v9
89; RV32-NEXT:    andi a0, a0, 255
90; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
91; RV32-NEXT:    vslidedown.vx v8, v8, a0
92; RV32-NEXT:    vmv.x.s a0, v8
93; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
94; RV32-NEXT:    vsrl.vx v8, v8, a1
95; RV32-NEXT:    vmv.x.s a1, v8
96; RV32-NEXT:  .LBB3_2:
97; RV32-NEXT:    ret
98;
99; RV64-LABEL: extract_last_i64:
100; RV64:       # %bb.0:
101; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
102; RV64-NEXT:    vmsne.vi v0, v9, 0
103; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
104; RV64-NEXT:    vmv.v.i v9, 0
105; RV64-NEXT:    vcpop.m a1, v0
106; RV64-NEXT:    vid.v v9, v0.t
107; RV64-NEXT:    beqz a1, .LBB3_2
108; RV64-NEXT:  # %bb.1:
109; RV64-NEXT:    vredmaxu.vs v9, v9, v9
110; RV64-NEXT:    vmv.x.s a0, v9
111; RV64-NEXT:    andi a0, a0, 255
112; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
113; RV64-NEXT:    vslidedown.vx v8, v8, a0
114; RV64-NEXT:    vmv.x.s a0, v8
115; RV64-NEXT:  .LBB3_2:
116; RV64-NEXT:    ret
117  %notzero = icmp ne <2 x i64> %mask, zeroinitializer
118  %res = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> %data, <2 x i1> %notzero, i64 %passthru)
119  ret i64 %res
120}
121
122define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %passthru) {
123; CHECK-LABEL: extract_last_float:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
126; CHECK-NEXT:    vmsne.vi v0, v9, 0
127; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
128; CHECK-NEXT:    vmv.v.i v9, 0
129; CHECK-NEXT:    vcpop.m a0, v0
130; CHECK-NEXT:    vid.v v9, v0.t
131; CHECK-NEXT:    beqz a0, .LBB4_2
132; CHECK-NEXT:  # %bb.1:
133; CHECK-NEXT:    vredmaxu.vs v9, v9, v9
134; CHECK-NEXT:    vmv.x.s a0, v9
135; CHECK-NEXT:    andi a0, a0, 255
136; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
137; CHECK-NEXT:    vslidedown.vx v8, v8, a0
138; CHECK-NEXT:    vfmv.f.s fa0, v8
139; CHECK-NEXT:  .LBB4_2:
140; CHECK-NEXT:    ret
141  %notzero = icmp ne <4 x i32> %mask, zeroinitializer
142  %res = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> %data, <4 x i1> %notzero, float %passthru)
143  ret float %res
144}
145
146define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %passthru) {
147; CHECK-LABEL: extract_last_double:
148; CHECK:       # %bb.0:
149; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
150; CHECK-NEXT:    vmsne.vi v0, v9, 0
151; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
152; CHECK-NEXT:    vmv.v.i v9, 0
153; CHECK-NEXT:    vcpop.m a0, v0
154; CHECK-NEXT:    vid.v v9, v0.t
155; CHECK-NEXT:    beqz a0, .LBB5_2
156; CHECK-NEXT:  # %bb.1:
157; CHECK-NEXT:    vredmaxu.vs v9, v9, v9
158; CHECK-NEXT:    vmv.x.s a0, v9
159; CHECK-NEXT:    andi a0, a0, 255
160; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
161; CHECK-NEXT:    vslidedown.vx v8, v8, a0
162; CHECK-NEXT:    vfmv.f.s fa0, v8
163; CHECK-NEXT:  .LBB5_2:
164; CHECK-NEXT:    ret
165  %notzero = icmp ne <2 x i64> %mask, zeroinitializer
166  %res = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> %data, <2 x i1> %notzero, double %passthru)
167  ret double %res
168}
169
170define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) {
171; CHECK-LABEL: extract_last_i8_scalable:
172; CHECK:       # %bb.0:
173; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
174; CHECK-NEXT:    vmv.v.i v10, 0
175; CHECK-NEXT:    vcpop.m a1, v0
176; CHECK-NEXT:    vid.v v10, v0.t
177; CHECK-NEXT:    beqz a1, .LBB6_2
178; CHECK-NEXT:  # %bb.1:
179; CHECK-NEXT:    vredmaxu.vs v10, v10, v10
180; CHECK-NEXT:    vmv.x.s a0, v10
181; CHECK-NEXT:    andi a0, a0, 255
182; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
183; CHECK-NEXT:    vslidedown.vx v8, v8, a0
184; CHECK-NEXT:    vmv.x.s a0, v8
185; CHECK-NEXT:  .LBB6_2:
186; CHECK-NEXT:    ret
187  %res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru)
188  ret i8 %res
189}
190
191define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) {
192; CHECK-LABEL: extract_last_i16_scalable:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
195; CHECK-NEXT:    vmv.v.i v10, 0
196; CHECK-NEXT:    vcpop.m a1, v0
197; CHECK-NEXT:    vid.v v10, v0.t
198; CHECK-NEXT:    beqz a1, .LBB7_2
199; CHECK-NEXT:  # %bb.1:
200; CHECK-NEXT:    vredmaxu.vs v10, v10, v10
201; CHECK-NEXT:    vmv.x.s a0, v10
202; CHECK-NEXT:    andi a0, a0, 255
203; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
204; CHECK-NEXT:    vslidedown.vx v8, v8, a0
205; CHECK-NEXT:    vmv.x.s a0, v8
206; CHECK-NEXT:  .LBB7_2:
207; CHECK-NEXT:    ret
208  %res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)
209  ret i16 %res
210}
211
212define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) {
213; CHECK-LABEL: extract_last_i32_scalable:
214; CHECK:       # %bb.0:
215; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
216; CHECK-NEXT:    vmv.v.i v10, 0
217; CHECK-NEXT:    vcpop.m a1, v0
218; CHECK-NEXT:    vid.v v10, v0.t
219; CHECK-NEXT:    beqz a1, .LBB8_2
220; CHECK-NEXT:  # %bb.1:
221; CHECK-NEXT:    vredmaxu.vs v10, v10, v10
222; CHECK-NEXT:    vmv.x.s a0, v10
223; CHECK-NEXT:    andi a0, a0, 255
224; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
225; CHECK-NEXT:    vslidedown.vx v8, v8, a0
226; CHECK-NEXT:    vmv.x.s a0, v8
227; CHECK-NEXT:  .LBB8_2:
228; CHECK-NEXT:    ret
229  %res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru)
230  ret i32 %res
231}
232
233define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) {
234; RV32-LABEL: extract_last_i64_scalable:
235; RV32:       # %bb.0:
236; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
237; RV32-NEXT:    vmv.v.i v10, 0
238; RV32-NEXT:    vcpop.m a2, v0
239; RV32-NEXT:    vid.v v10, v0.t
240; RV32-NEXT:    beqz a2, .LBB9_2
241; RV32-NEXT:  # %bb.1:
242; RV32-NEXT:    vredmaxu.vs v10, v10, v10
243; RV32-NEXT:    li a1, 32
244; RV32-NEXT:    vmv.x.s a0, v10
245; RV32-NEXT:    andi a0, a0, 255
246; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
247; RV32-NEXT:    vslidedown.vx v8, v8, a0
248; RV32-NEXT:    vmv.x.s a0, v8
249; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
250; RV32-NEXT:    vsrl.vx v8, v8, a1
251; RV32-NEXT:    vmv.x.s a1, v8
252; RV32-NEXT:  .LBB9_2:
253; RV32-NEXT:    ret
254;
255; RV64-LABEL: extract_last_i64_scalable:
256; RV64:       # %bb.0:
257; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
258; RV64-NEXT:    vmv.v.i v10, 0
259; RV64-NEXT:    vcpop.m a1, v0
260; RV64-NEXT:    vid.v v10, v0.t
261; RV64-NEXT:    beqz a1, .LBB9_2
262; RV64-NEXT:  # %bb.1:
263; RV64-NEXT:    vredmaxu.vs v10, v10, v10
264; RV64-NEXT:    vmv.x.s a0, v10
265; RV64-NEXT:    andi a0, a0, 255
266; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
267; RV64-NEXT:    vslidedown.vx v8, v8, a0
268; RV64-NEXT:    vmv.x.s a0, v8
269; RV64-NEXT:  .LBB9_2:
270; RV64-NEXT:    ret
271  %res = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru)
272  ret i64 %res
273}
274
275define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) {
276; CHECK-LABEL: extract_last_float_scalable:
277; CHECK:       # %bb.0:
278; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
279; CHECK-NEXT:    vmv.v.i v10, 0
280; CHECK-NEXT:    vcpop.m a0, v0
281; CHECK-NEXT:    vid.v v10, v0.t
282; CHECK-NEXT:    beqz a0, .LBB10_2
283; CHECK-NEXT:  # %bb.1:
284; CHECK-NEXT:    vredmaxu.vs v10, v10, v10
285; CHECK-NEXT:    vmv.x.s a0, v10
286; CHECK-NEXT:    andi a0, a0, 255
287; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
288; CHECK-NEXT:    vslidedown.vx v8, v8, a0
289; CHECK-NEXT:    vfmv.f.s fa0, v8
290; CHECK-NEXT:  .LBB10_2:
291; CHECK-NEXT:    ret
292  %res = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru)
293  ret float %res
294}
295
296define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) {
297; CHECK-LABEL: extract_last_double_scalable:
298; CHECK:       # %bb.0:
299; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
300; CHECK-NEXT:    vmv.v.i v10, 0
301; CHECK-NEXT:    vcpop.m a0, v0
302; CHECK-NEXT:    vid.v v10, v0.t
303; CHECK-NEXT:    beqz a0, .LBB11_2
304; CHECK-NEXT:  # %bb.1:
305; CHECK-NEXT:    vredmaxu.vs v10, v10, v10
306; CHECK-NEXT:    vmv.x.s a0, v10
307; CHECK-NEXT:    andi a0, a0, 255
308; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
309; CHECK-NEXT:    vslidedown.vx v8, v8, a0
310; CHECK-NEXT:    vfmv.f.s fa0, v8
311; CHECK-NEXT:  .LBB11_2:
312; CHECK-NEXT:    ret
313  %res = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru)
314  ret double %res
315}
316
317declare i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8>, <16 x i1>, i8)
318declare i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16>, <8 x i1>, i16)
319declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32>, <4 x i1>, i32)
320declare i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64>, <2 x i1>, i64)
321declare float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float>, <4 x i1>, float)
322declare double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double>, <2 x i1>, double)
323declare i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
324declare i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
325declare i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
326declare i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)
327declare float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float)
328declare double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
329