xref: /llvm-project/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll (revision d9f165ddea3223217a959c3cea3d2c613b132935)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED
3; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED
4
5define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) {
6; NEON-FIXED-LABEL: extract_last_i8:
7; NEON-FIXED:       // %bb.0:
8; NEON-FIXED-NEXT:    sub sp, sp, #16
9; NEON-FIXED-NEXT:    .cfi_def_cfa_offset 16
10; NEON-FIXED-NEXT:    cmeq v2.16b, v1.16b, #0
11; NEON-FIXED-NEXT:    adrp x8, .LCPI0_0
12; NEON-FIXED-NEXT:    cmtst v1.16b, v1.16b, v1.16b
13; NEON-FIXED-NEXT:    ldr q3, [x8, :lo12:.LCPI0_0]
14; NEON-FIXED-NEXT:    mov x9, sp
15; NEON-FIXED-NEXT:    str q0, [sp]
16; NEON-FIXED-NEXT:    bic v2.16b, v3.16b, v2.16b
17; NEON-FIXED-NEXT:    umaxv b1, v1.16b
18; NEON-FIXED-NEXT:    umaxv b2, v2.16b
19; NEON-FIXED-NEXT:    fmov w8, s2
20; NEON-FIXED-NEXT:    bfxil x9, x8, #0, #4
21; NEON-FIXED-NEXT:    ldrb w8, [x9]
22; NEON-FIXED-NEXT:    fmov w9, s1
23; NEON-FIXED-NEXT:    tst w9, #0x1
24; NEON-FIXED-NEXT:    csel w0, w8, w0, ne
25; NEON-FIXED-NEXT:    add sp, sp, #16
26; NEON-FIXED-NEXT:    ret
27;
28; SVE-FIXED-LABEL: extract_last_i8:
29; SVE-FIXED:       // %bb.0:
30; SVE-FIXED-NEXT:    sub sp, sp, #16
31; SVE-FIXED-NEXT:    .cfi_def_cfa_offset 16
32; SVE-FIXED-NEXT:    index z2.b, #0, #1
33; SVE-FIXED-NEXT:    cmeq v3.16b, v1.16b, #0
34; SVE-FIXED-NEXT:    cmtst v1.16b, v1.16b, v1.16b
35; SVE-FIXED-NEXT:    mov x9, sp
36; SVE-FIXED-NEXT:    str q0, [sp]
37; SVE-FIXED-NEXT:    bic v2.16b, v2.16b, v3.16b
38; SVE-FIXED-NEXT:    umaxv b1, v1.16b
39; SVE-FIXED-NEXT:    umaxv b2, v2.16b
40; SVE-FIXED-NEXT:    fmov w8, s2
41; SVE-FIXED-NEXT:    bfxil x9, x8, #0, #4
42; SVE-FIXED-NEXT:    ldrb w8, [x9]
43; SVE-FIXED-NEXT:    fmov w9, s1
44; SVE-FIXED-NEXT:    tst w9, #0x1
45; SVE-FIXED-NEXT:    csel w0, w8, w0, ne
46; SVE-FIXED-NEXT:    add sp, sp, #16
47; SVE-FIXED-NEXT:    ret
48  %notzero = icmp ne <16 x i8> %mask, zeroinitializer
49  %res = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> %data, <16 x i1> %notzero, i8 %passthru)
50  ret i8 %res
51}
52
53define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) {
54; NEON-FIXED-LABEL: extract_last_i16:
55; NEON-FIXED:       // %bb.0:
56; NEON-FIXED-NEXT:    sub sp, sp, #16
57; NEON-FIXED-NEXT:    .cfi_def_cfa_offset 16
58; NEON-FIXED-NEXT:    cmtst v1.8h, v1.8h, v1.8h
59; NEON-FIXED-NEXT:    adrp x8, .LCPI1_0
60; NEON-FIXED-NEXT:    mov x9, sp
61; NEON-FIXED-NEXT:    ldr d3, [x8, :lo12:.LCPI1_0]
62; NEON-FIXED-NEXT:    str q0, [sp]
63; NEON-FIXED-NEXT:    xtn v2.8b, v1.8h
64; NEON-FIXED-NEXT:    umaxv h1, v1.8h
65; NEON-FIXED-NEXT:    and v2.8b, v2.8b, v3.8b
66; NEON-FIXED-NEXT:    umaxv b2, v2.8b
67; NEON-FIXED-NEXT:    fmov w8, s2
68; NEON-FIXED-NEXT:    bfi x9, x8, #1, #3
69; NEON-FIXED-NEXT:    ldrh w8, [x9]
70; NEON-FIXED-NEXT:    fmov w9, s1
71; NEON-FIXED-NEXT:    tst w9, #0x1
72; NEON-FIXED-NEXT:    csel w0, w8, w0, ne
73; NEON-FIXED-NEXT:    add sp, sp, #16
74; NEON-FIXED-NEXT:    ret
75;
76; SVE-FIXED-LABEL: extract_last_i16:
77; SVE-FIXED:       // %bb.0:
78; SVE-FIXED-NEXT:    sub sp, sp, #16
79; SVE-FIXED-NEXT:    .cfi_def_cfa_offset 16
80; SVE-FIXED-NEXT:    cmtst v1.8h, v1.8h, v1.8h
81; SVE-FIXED-NEXT:    index z3.b, #0, #1
82; SVE-FIXED-NEXT:    mov x9, sp
83; SVE-FIXED-NEXT:    str q0, [sp]
84; SVE-FIXED-NEXT:    xtn v2.8b, v1.8h
85; SVE-FIXED-NEXT:    umaxv h1, v1.8h
86; SVE-FIXED-NEXT:    and v2.8b, v2.8b, v3.8b
87; SVE-FIXED-NEXT:    umaxv b2, v2.8b
88; SVE-FIXED-NEXT:    fmov w8, s2
89; SVE-FIXED-NEXT:    bfi x9, x8, #1, #3
90; SVE-FIXED-NEXT:    ldrh w8, [x9]
91; SVE-FIXED-NEXT:    fmov w9, s1
92; SVE-FIXED-NEXT:    tst w9, #0x1
93; SVE-FIXED-NEXT:    csel w0, w8, w0, ne
94; SVE-FIXED-NEXT:    add sp, sp, #16
95; SVE-FIXED-NEXT:    ret
96  %notzero = icmp ne <8 x i16> %mask, zeroinitializer
97  %res = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> %data, <8 x i1> %notzero, i16 %passthru)
98  ret i16 %res
99}
100
101define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) {
102; NEON-FIXED-LABEL: extract_last_i32:
103; NEON-FIXED:       // %bb.0:
104; NEON-FIXED-NEXT:    sub sp, sp, #16
105; NEON-FIXED-NEXT:    .cfi_def_cfa_offset 16
106; NEON-FIXED-NEXT:    cmtst v1.4s, v1.4s, v1.4s
107; NEON-FIXED-NEXT:    adrp x8, .LCPI2_0
108; NEON-FIXED-NEXT:    mov x9, sp
109; NEON-FIXED-NEXT:    ldr d3, [x8, :lo12:.LCPI2_0]
110; NEON-FIXED-NEXT:    str q0, [sp]
111; NEON-FIXED-NEXT:    xtn v2.4h, v1.4s
112; NEON-FIXED-NEXT:    umaxv s1, v1.4s
113; NEON-FIXED-NEXT:    and v2.8b, v2.8b, v3.8b
114; NEON-FIXED-NEXT:    umaxv h2, v2.4h
115; NEON-FIXED-NEXT:    fmov w8, s2
116; NEON-FIXED-NEXT:    bfi x9, x8, #2, #2
117; NEON-FIXED-NEXT:    ldr w8, [x9]
118; NEON-FIXED-NEXT:    fmov w9, s1
119; NEON-FIXED-NEXT:    tst w9, #0x1
120; NEON-FIXED-NEXT:    csel w0, w8, w0, ne
121; NEON-FIXED-NEXT:    add sp, sp, #16
122; NEON-FIXED-NEXT:    ret
123;
124; SVE-FIXED-LABEL: extract_last_i32:
125; SVE-FIXED:       // %bb.0:
126; SVE-FIXED-NEXT:    sub sp, sp, #16
127; SVE-FIXED-NEXT:    .cfi_def_cfa_offset 16
128; SVE-FIXED-NEXT:    cmtst v1.4s, v1.4s, v1.4s
129; SVE-FIXED-NEXT:    index z3.h, #0, #1
130; SVE-FIXED-NEXT:    mov x9, sp
131; SVE-FIXED-NEXT:    str q0, [sp]
132; SVE-FIXED-NEXT:    xtn v2.4h, v1.4s
133; SVE-FIXED-NEXT:    umaxv s1, v1.4s
134; SVE-FIXED-NEXT:    and v2.8b, v2.8b, v3.8b
135; SVE-FIXED-NEXT:    umaxv h2, v2.4h
136; SVE-FIXED-NEXT:    fmov w8, s2
137; SVE-FIXED-NEXT:    bfi x9, x8, #2, #2
138; SVE-FIXED-NEXT:    ldr w8, [x9]
139; SVE-FIXED-NEXT:    fmov w9, s1
140; SVE-FIXED-NEXT:    tst w9, #0x1
141; SVE-FIXED-NEXT:    csel w0, w8, w0, ne
142; SVE-FIXED-NEXT:    add sp, sp, #16
143; SVE-FIXED-NEXT:    ret
144  %notzero = icmp ne <4 x i32> %mask, zeroinitializer
145  %res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %data, <4 x i1> %notzero, i32 %passthru)
146  ret i32 %res
147}
148
149define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
150; NEON-FIXED-LABEL: extract_last_i64:
151; NEON-FIXED:       // %bb.0:
152; NEON-FIXED-NEXT:    sub sp, sp, #16
153; NEON-FIXED-NEXT:    .cfi_def_cfa_offset 16
154; NEON-FIXED-NEXT:    cmtst v1.2d, v1.2d, v1.2d
155; NEON-FIXED-NEXT:    adrp x8, .LCPI3_0
156; NEON-FIXED-NEXT:    mov x9, sp
157; NEON-FIXED-NEXT:    ldr d3, [x8, :lo12:.LCPI3_0]
158; NEON-FIXED-NEXT:    str q0, [sp]
159; NEON-FIXED-NEXT:    xtn v2.2s, v1.2d
160; NEON-FIXED-NEXT:    umaxv s1, v1.4s
161; NEON-FIXED-NEXT:    and v2.8b, v2.8b, v3.8b
162; NEON-FIXED-NEXT:    umaxp v2.2s, v2.2s, v2.2s
163; NEON-FIXED-NEXT:    fmov w8, s2
164; NEON-FIXED-NEXT:    bfi x9, x8, #3, #1
165; NEON-FIXED-NEXT:    ldr x8, [x9]
166; NEON-FIXED-NEXT:    fmov w9, s1
167; NEON-FIXED-NEXT:    tst w9, #0x1
168; NEON-FIXED-NEXT:    csel x0, x8, x0, ne
169; NEON-FIXED-NEXT:    add sp, sp, #16
170; NEON-FIXED-NEXT:    ret
171;
172; SVE-FIXED-LABEL: extract_last_i64:
173; SVE-FIXED:       // %bb.0:
174; SVE-FIXED-NEXT:    sub sp, sp, #16
175; SVE-FIXED-NEXT:    .cfi_def_cfa_offset 16
176; SVE-FIXED-NEXT:    cmtst v1.2d, v1.2d, v1.2d
177; SVE-FIXED-NEXT:    index z3.s, #0, #1
178; SVE-FIXED-NEXT:    mov x9, sp
179; SVE-FIXED-NEXT:    str q0, [sp]
180; SVE-FIXED-NEXT:    xtn v2.2s, v1.2d
181; SVE-FIXED-NEXT:    umaxv s1, v1.4s
182; SVE-FIXED-NEXT:    and v2.8b, v2.8b, v3.8b
183; SVE-FIXED-NEXT:    umaxp v2.2s, v2.2s, v2.2s
184; SVE-FIXED-NEXT:    fmov w8, s2
185; SVE-FIXED-NEXT:    bfi x9, x8, #3, #1
186; SVE-FIXED-NEXT:    ldr x8, [x9]
187; SVE-FIXED-NEXT:    fmov w9, s1
188; SVE-FIXED-NEXT:    tst w9, #0x1
189; SVE-FIXED-NEXT:    csel x0, x8, x0, ne
190; SVE-FIXED-NEXT:    add sp, sp, #16
191; SVE-FIXED-NEXT:    ret
192  %notzero = icmp ne <2 x i64> %mask, zeroinitializer
193  %res = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> %data, <2 x i1> %notzero, i64 %passthru)
194  ret i64 %res
195}
196
197define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %passthru) {
198; NEON-FIXED-LABEL: extract_last_float:
199; NEON-FIXED:       // %bb.0:
200; NEON-FIXED-NEXT:    sub sp, sp, #16
201; NEON-FIXED-NEXT:    .cfi_def_cfa_offset 16
202; NEON-FIXED-NEXT:    cmtst v1.4s, v1.4s, v1.4s
203; NEON-FIXED-NEXT:    adrp x8, .LCPI4_0
204; NEON-FIXED-NEXT:    mov x9, sp
205; NEON-FIXED-NEXT:    ldr d4, [x8, :lo12:.LCPI4_0]
206; NEON-FIXED-NEXT:    str q0, [sp]
207; NEON-FIXED-NEXT:    xtn v3.4h, v1.4s
208; NEON-FIXED-NEXT:    umaxv s1, v1.4s
209; NEON-FIXED-NEXT:    and v3.8b, v3.8b, v4.8b
210; NEON-FIXED-NEXT:    umaxv h3, v3.4h
211; NEON-FIXED-NEXT:    fmov w8, s3
212; NEON-FIXED-NEXT:    bfi x9, x8, #2, #2
213; NEON-FIXED-NEXT:    fmov w8, s1
214; NEON-FIXED-NEXT:    ldr s0, [x9]
215; NEON-FIXED-NEXT:    tst w8, #0x1
216; NEON-FIXED-NEXT:    fcsel s0, s0, s2, ne
217; NEON-FIXED-NEXT:    add sp, sp, #16
218; NEON-FIXED-NEXT:    ret
219;
220; SVE-FIXED-LABEL: extract_last_float:
221; SVE-FIXED:       // %bb.0:
222; SVE-FIXED-NEXT:    sub sp, sp, #16
223; SVE-FIXED-NEXT:    .cfi_def_cfa_offset 16
224; SVE-FIXED-NEXT:    cmtst v1.4s, v1.4s, v1.4s
225; SVE-FIXED-NEXT:    index z4.h, #0, #1
226; SVE-FIXED-NEXT:    mov x9, sp
227; SVE-FIXED-NEXT:    str q0, [sp]
228; SVE-FIXED-NEXT:    xtn v3.4h, v1.4s
229; SVE-FIXED-NEXT:    umaxv s1, v1.4s
230; SVE-FIXED-NEXT:    and v3.8b, v3.8b, v4.8b
231; SVE-FIXED-NEXT:    umaxv h3, v3.4h
232; SVE-FIXED-NEXT:    fmov w8, s3
233; SVE-FIXED-NEXT:    bfi x9, x8, #2, #2
234; SVE-FIXED-NEXT:    fmov w8, s1
235; SVE-FIXED-NEXT:    ldr s0, [x9]
236; SVE-FIXED-NEXT:    tst w8, #0x1
237; SVE-FIXED-NEXT:    fcsel s0, s0, s2, ne
238; SVE-FIXED-NEXT:    add sp, sp, #16
239; SVE-FIXED-NEXT:    ret
240  %notzero = icmp ne <4 x i32> %mask, zeroinitializer
241  %res = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> %data, <4 x i1> %notzero, float %passthru)
242  ret float %res
243}
244
245define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %passthru) {
246; NEON-FIXED-LABEL: extract_last_double:
247; NEON-FIXED:       // %bb.0:
248; NEON-FIXED-NEXT:    sub sp, sp, #16
249; NEON-FIXED-NEXT:    .cfi_def_cfa_offset 16
250; NEON-FIXED-NEXT:    cmtst v1.2d, v1.2d, v1.2d
251; NEON-FIXED-NEXT:    adrp x8, .LCPI5_0
252; NEON-FIXED-NEXT:    mov x9, sp
253; NEON-FIXED-NEXT:    ldr d4, [x8, :lo12:.LCPI5_0]
254; NEON-FIXED-NEXT:    str q0, [sp]
255; NEON-FIXED-NEXT:    xtn v3.2s, v1.2d
256; NEON-FIXED-NEXT:    umaxv s1, v1.4s
257; NEON-FIXED-NEXT:    and v3.8b, v3.8b, v4.8b
258; NEON-FIXED-NEXT:    umaxp v3.2s, v3.2s, v3.2s
259; NEON-FIXED-NEXT:    fmov w8, s3
260; NEON-FIXED-NEXT:    bfi x9, x8, #3, #1
261; NEON-FIXED-NEXT:    fmov w8, s1
262; NEON-FIXED-NEXT:    ldr d0, [x9]
263; NEON-FIXED-NEXT:    tst w8, #0x1
264; NEON-FIXED-NEXT:    fcsel d0, d0, d2, ne
265; NEON-FIXED-NEXT:    add sp, sp, #16
266; NEON-FIXED-NEXT:    ret
267;
268; SVE-FIXED-LABEL: extract_last_double:
269; SVE-FIXED:       // %bb.0:
270; SVE-FIXED-NEXT:    sub sp, sp, #16
271; SVE-FIXED-NEXT:    .cfi_def_cfa_offset 16
272; SVE-FIXED-NEXT:    cmtst v1.2d, v1.2d, v1.2d
273; SVE-FIXED-NEXT:    index z4.s, #0, #1
274; SVE-FIXED-NEXT:    mov x9, sp
275; SVE-FIXED-NEXT:    str q0, [sp]
276; SVE-FIXED-NEXT:    xtn v3.2s, v1.2d
277; SVE-FIXED-NEXT:    umaxv s1, v1.4s
278; SVE-FIXED-NEXT:    and v3.8b, v3.8b, v4.8b
279; SVE-FIXED-NEXT:    umaxp v3.2s, v3.2s, v3.2s
280; SVE-FIXED-NEXT:    fmov w8, s3
281; SVE-FIXED-NEXT:    bfi x9, x8, #3, #1
282; SVE-FIXED-NEXT:    fmov w8, s1
283; SVE-FIXED-NEXT:    ldr d0, [x9]
284; SVE-FIXED-NEXT:    tst w8, #0x1
285; SVE-FIXED-NEXT:    fcsel d0, d0, d2, ne
286; SVE-FIXED-NEXT:    add sp, sp, #16
287; SVE-FIXED-NEXT:    ret
288  %notzero = icmp ne <2 x i64> %mask, zeroinitializer
289  %res = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> %data, <2 x i1> %notzero, double %passthru)
290  ret double %res
291}
292
293define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) #0 {
294; CHECK-LABEL: extract_last_i8_scalable:
295; CHECK:       // %bb.0:
296; CHECK-NEXT:    index z1.b, #0, #1
297; CHECK-NEXT:    mov z2.b, #0 // =0x0
298; CHECK-NEXT:    ptrue p1.b
299; CHECK-NEXT:    sel z1.b, p0, z1.b, z2.b
300; CHECK-NEXT:    umaxv b1, p1, z1.b
301; CHECK-NEXT:    fmov w8, s1
302; CHECK-NEXT:    and x8, x8, #0xff
303; CHECK-NEXT:    whilels p1.b, xzr, x8
304; CHECK-NEXT:    ptest p0, p0.b
305; CHECK-NEXT:    lastb w8, p1, z0.b
306; CHECK-NEXT:    csel w0, w8, w0, ne
307; CHECK-NEXT:    ret
308  %res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru)
309  ret i8 %res
310}
311
312define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) #0 {
313; CHECK-LABEL: extract_last_i16_scalable:
314; CHECK:       // %bb.0:
315; CHECK-NEXT:    index z1.h, #0, #1
316; CHECK-NEXT:    mov z2.h, #0 // =0x0
317; CHECK-NEXT:    ptrue p1.h
318; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
319; CHECK-NEXT:    umaxv h1, p1, z1.h
320; CHECK-NEXT:    fmov w8, s1
321; CHECK-NEXT:    and x8, x8, #0xffff
322; CHECK-NEXT:    whilels p2.h, xzr, x8
323; CHECK-NEXT:    ptest p1, p0.b
324; CHECK-NEXT:    lastb w8, p2, z0.h
325; CHECK-NEXT:    csel w0, w8, w0, ne
326; CHECK-NEXT:    ret
327  %res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)
328  ret i16 %res
329}
330
331define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) #0 {
332; CHECK-LABEL: extract_last_i32_scalable:
333; CHECK:       // %bb.0:
334; CHECK-NEXT:    index z1.s, #0, #1
335; CHECK-NEXT:    mov z2.s, #0 // =0x0
336; CHECK-NEXT:    ptrue p1.s
337; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
338; CHECK-NEXT:    umaxv s1, p1, z1.s
339; CHECK-NEXT:    fmov w8, s1
340; CHECK-NEXT:    mov w8, w8
341; CHECK-NEXT:    whilels p2.s, xzr, x8
342; CHECK-NEXT:    ptest p1, p0.b
343; CHECK-NEXT:    lastb w8, p2, z0.s
344; CHECK-NEXT:    csel w0, w8, w0, ne
345; CHECK-NEXT:    ret
346  %res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru)
347  ret i32 %res
348}
349
350define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) #0 {
351; CHECK-LABEL: extract_last_i64_scalable:
352; CHECK:       // %bb.0:
353; CHECK-NEXT:    index z1.d, #0, #1
354; CHECK-NEXT:    mov z2.d, #0 // =0x0
355; CHECK-NEXT:    ptrue p1.d
356; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
357; CHECK-NEXT:    umaxv d1, p1, z1.d
358; CHECK-NEXT:    fmov x8, d1
359; CHECK-NEXT:    whilels p2.d, xzr, x8
360; CHECK-NEXT:    ptest p1, p0.b
361; CHECK-NEXT:    lastb x8, p2, z0.d
362; CHECK-NEXT:    csel x0, x8, x0, ne
363; CHECK-NEXT:    ret
364  %res = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru)
365  ret i64 %res
366}
367
368define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) #0 {
369; CHECK-LABEL: extract_last_float_scalable:
370; CHECK:       // %bb.0:
371; CHECK-NEXT:    index z2.s, #0, #1
372; CHECK-NEXT:    mov z3.s, #0 // =0x0
373; CHECK-NEXT:    ptrue p1.s
374; CHECK-NEXT:    sel z2.s, p0, z2.s, z3.s
375; CHECK-NEXT:    umaxv s2, p1, z2.s
376; CHECK-NEXT:    fmov w8, s2
377; CHECK-NEXT:    mov w8, w8
378; CHECK-NEXT:    whilels p2.s, xzr, x8
379; CHECK-NEXT:    ptest p1, p0.b
380; CHECK-NEXT:    lastb s0, p2, z0.s
381; CHECK-NEXT:    fcsel s0, s0, s1, ne
382; CHECK-NEXT:    ret
383  %res = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru)
384  ret float %res
385}
386
387define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) #0 {
388; CHECK-LABEL: extract_last_double_scalable:
389; CHECK:       // %bb.0:
390; CHECK-NEXT:    index z2.d, #0, #1
391; CHECK-NEXT:    mov z3.d, #0 // =0x0
392; CHECK-NEXT:    ptrue p1.d
393; CHECK-NEXT:    sel z2.d, p0, z2.d, z3.d
394; CHECK-NEXT:    umaxv d2, p1, z2.d
395; CHECK-NEXT:    fmov x8, d2
396; CHECK-NEXT:    whilels p2.d, xzr, x8
397; CHECK-NEXT:    ptest p1, p0.b
398; CHECK-NEXT:    lastb d0, p2, z0.d
399; CHECK-NEXT:    fcsel d0, d0, d1, ne
400; CHECK-NEXT:    ret
401  %res = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru)
402  ret double %res
403}
404
405;; If the passthru parameter is poison, we shouldn't see a select at the end.
406define i8 @extract_last_i8_scalable_poison_passthru(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask) #0 {
407; CHECK-LABEL: extract_last_i8_scalable_poison_passthru:
408; CHECK:       // %bb.0:
409; CHECK-NEXT:    index z1.b, #0, #1
410; CHECK-NEXT:    mov z2.b, #0 // =0x0
411; CHECK-NEXT:    sel z1.b, p0, z1.b, z2.b
412; CHECK-NEXT:    ptrue p0.b
413; CHECK-NEXT:    umaxv b1, p0, z1.b
414; CHECK-NEXT:    fmov w8, s1
415; CHECK-NEXT:    and x8, x8, #0xff
416; CHECK-NEXT:    whilels p0.b, xzr, x8
417; CHECK-NEXT:    lastb w0, p0, z0.b
418; CHECK-NEXT:    ret
419  %res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 poison)
420  ret i8 %res
421}
422
423declare i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8>, <16 x i1>, i8)
424declare i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16>, <8 x i1>, i16)
425declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32>, <4 x i1>, i32)
426declare i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64>, <2 x i1>, i64)
427declare float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float>, <4 x i1>, float)
428declare double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double>, <2 x i1>, double)
429declare i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
430declare i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
431declare i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
432declare i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)
433declare float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float)
434declare double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
435
436attributes #0 = { "target-features"="+sve" vscale_range(1, 16) }
437