xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s -o - | FileCheck %s
3
4; Extracting a legal fixed-length vector from an illegal subvector
5
6define <4 x i32> @extract_v4i32_nxv16i32_12(<vscale x 16 x i32> %arg) {
7; CHECK-LABEL: extract_v4i32_nxv16i32_12:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
10; CHECK-NEXT:    addvl sp, sp, #-4
11; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
12; CHECK-NEXT:    .cfi_offset w29, -16
13; CHECK-NEXT:    ptrue p0.s
14; CHECK-NEXT:    st1w { z3.s }, p0, [sp, #3, mul vl]
15; CHECK-NEXT:    st1w { z2.s }, p0, [sp, #2, mul vl]
16; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
17; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
18; CHECK-NEXT:    ldr q0, [sp, #48]
19; CHECK-NEXT:    addvl sp, sp, #4
20; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
21; CHECK-NEXT:    ret
22  %ext = call <4 x i32> @llvm.vector.extract.v4i32.nxv16i32(<vscale x 16 x i32> %arg, i64 12)
23  ret <4 x i32> %ext
24}
25
26define <8 x i16> @extract_v8i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
27; CHECK-LABEL: extract_v8i16_nxv32i16_8:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
30; CHECK-NEXT:    addvl sp, sp, #-2
31; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
32; CHECK-NEXT:    .cfi_offset w29, -16
33; CHECK-NEXT:    ptrue p0.h
34; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
35; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
36; CHECK-NEXT:    ldr q0, [sp, #16]
37; CHECK-NEXT:    addvl sp, sp, #2
38; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
39; CHECK-NEXT:    ret
40  %ext = call <8 x i16> @llvm.vector.extract.v8i16.nxv32i16(<vscale x 32 x i16> %arg, i64 8)
41  ret <8 x i16> %ext
42}
43
44define <4 x i16> @extract_v4i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
45; CHECK-LABEL: extract_v4i16_nxv32i16_8:
46; CHECK:       // %bb.0:
47; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
48; CHECK-NEXT:    addvl sp, sp, #-4
49; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
50; CHECK-NEXT:    .cfi_offset w29, -16
51; CHECK-NEXT:    ptrue p0.h
52; CHECK-NEXT:    st1h { z3.h }, p0, [sp, #3, mul vl]
53; CHECK-NEXT:    st1h { z2.h }, p0, [sp, #2, mul vl]
54; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
55; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
56; CHECK-NEXT:    ldr d0, [sp, #32]
57; CHECK-NEXT:    addvl sp, sp, #4
58; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
59; CHECK-NEXT:    ret
60  %ext = call <4 x i16> @llvm.vector.extract.v4i16.nxv32i16(<vscale x 32 x i16> %arg, i64 16)
61  ret <4 x i16> %ext
62}
63
64; The result type gets promoted, leading to us extracting 2 elements from a nxv32i16.
65; Hence we don't end up in SplitVecOp_EXTRACT_SUBVECTOR, but in SplitVecOp_EXTRACT_VECTOR_ELT instead.
66define <2 x i16> @extract_v2i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
67; CHECK-LABEL: extract_v2i16_nxv32i16_8:
68; CHECK:       // %bb.0:
69; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
70; CHECK-NEXT:    addvl sp, sp, #-8
71; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
72; CHECK-NEXT:    .cfi_offset w29, -16
73; CHECK-NEXT:    ptrue p0.h
74; CHECK-NEXT:    mov x8, sp
75; CHECK-NEXT:    add x8, x8, #32
76; CHECK-NEXT:    st1h { z3.h }, p0, [sp, #3, mul vl]
77; CHECK-NEXT:    st1h { z2.h }, p0, [sp, #2, mul vl]
78; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
79; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
80; CHECK-NEXT:    st1h { z3.h }, p0, [sp, #7, mul vl]
81; CHECK-NEXT:    st1h { z2.h }, p0, [sp, #6, mul vl]
82; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #5, mul vl]
83; CHECK-NEXT:    st1h { z0.h }, p0, [sp, #4, mul vl]
84; CHECK-NEXT:    ld1 { v0.h }[0], [x8]
85; CHECK-NEXT:    addvl x8, sp, #4
86; CHECK-NEXT:    add x8, x8, #34
87; CHECK-NEXT:    ld1 { v0.h }[2], [x8]
88; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
89; CHECK-NEXT:    addvl sp, sp, #8
90; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
91; CHECK-NEXT:    ret
92  %ext = call <2 x i16> @llvm.vector.extract.v2i16.nxv32i16(<vscale x 32 x i16> %arg, i64 16)
93  ret <2 x i16> %ext
94}
95
96define <2 x i64> @extract_v2i64_nxv8i64_8(<vscale x 8 x i64> %arg) {
97; CHECK-LABEL: extract_v2i64_nxv8i64_8:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
100; CHECK-NEXT:    addvl sp, sp, #-4
101; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
102; CHECK-NEXT:    .cfi_offset w29, -16
103; CHECK-NEXT:    cnth x8
104; CHECK-NEXT:    mov w9, #8 // =0x8
105; CHECK-NEXT:    ptrue p0.d
106; CHECK-NEXT:    sub x8, x8, #2
107; CHECK-NEXT:    cmp x8, #8
108; CHECK-NEXT:    st1d { z3.d }, p0, [sp, #3, mul vl]
109; CHECK-NEXT:    csel x8, x8, x9, lo
110; CHECK-NEXT:    st1d { z2.d }, p0, [sp, #2, mul vl]
111; CHECK-NEXT:    mov x9, sp
112; CHECK-NEXT:    lsl x8, x8, #3
113; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
114; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
115; CHECK-NEXT:    ldr q0, [x9, x8]
116; CHECK-NEXT:    addvl sp, sp, #4
117; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
118; CHECK-NEXT:    ret
119  %ext = call <2 x i64> @llvm.vector.extract.v2i64.nxv8i64(<vscale x 8 x i64> %arg, i64 8)
120  ret <2 x i64> %ext
121}
122
123define <4 x float> @extract_v4f32_nxv16f32_12(<vscale x 16 x float> %arg) {
124; CHECK-LABEL: extract_v4f32_nxv16f32_12:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
127; CHECK-NEXT:    addvl sp, sp, #-4
128; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
129; CHECK-NEXT:    .cfi_offset w29, -16
130; CHECK-NEXT:    ptrue p0.s
131; CHECK-NEXT:    st1w { z3.s }, p0, [sp, #3, mul vl]
132; CHECK-NEXT:    st1w { z2.s }, p0, [sp, #2, mul vl]
133; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
134; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
135; CHECK-NEXT:    ldr q0, [sp, #48]
136; CHECK-NEXT:    addvl sp, sp, #4
137; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
138; CHECK-NEXT:    ret
139  %ext = call <4 x float> @llvm.vector.extract.v4f32.nxv16f32(<vscale x 16 x float> %arg, i64 12)
140  ret <4 x float> %ext
141}
142
143define <2 x float> @extract_v2f32_nxv16f32_2(<vscale x 16 x float> %arg) {
144; CHECK-LABEL: extract_v2f32_nxv16f32_2:
145; CHECK:       // %bb.0:
146; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
147; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
148; CHECK-NEXT:    ret
149  %ext = call <2 x float> @llvm.vector.extract.v2f32.nxv16f32(<vscale x 16 x float> %arg, i64 2)
150  ret <2 x float> %ext
151}
152
153define <4 x i1> @extract_v4i1_nxv32i1_0(<vscale x 32 x i1> %arg) {
154; CHECK-LABEL: extract_v4i1_nxv32i1_0:
155; CHECK:       // %bb.0:
156; CHECK-NEXT:    mov z1.b, p0/z, #1 // =0x1
157; CHECK-NEXT:    umov w8, v1.b[1]
158; CHECK-NEXT:    mov v0.16b, v1.16b
159; CHECK-NEXT:    umov w9, v1.b[2]
160; CHECK-NEXT:    mov v0.h[1], w8
161; CHECK-NEXT:    umov w8, v1.b[3]
162; CHECK-NEXT:    mov v0.h[2], w9
163; CHECK-NEXT:    mov v0.h[3], w8
164; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
165; CHECK-NEXT:    ret
166  %ext = call <4 x i1> @llvm.vector.extract.v4i1.nxv32i1(<vscale x 32 x i1> %arg, i64 0)
167  ret <4 x i1> %ext
168}
169
170; The result type gets promoted, leading to us extracting 4 elements from a nxv32i16.
171; Hence we don't end up in SplitVecOp_EXTRACT_SUBVECTOR, but in SplitVecOp_EXTRACT_VECTOR_ELT instead.
172define <4 x i1> @extract_v4i1_nxv32i1_16(<vscale x 32 x i1> %arg) {
173; CHECK-LABEL: extract_v4i1_nxv32i1_16:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
176; CHECK-NEXT:    addvl sp, sp, #-8
177; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
178; CHECK-NEXT:    .cfi_offset w29, -16
179; CHECK-NEXT:    mov z0.b, p1/z, #1 // =0x1
180; CHECK-NEXT:    mov z1.b, p0/z, #1 // =0x1
181; CHECK-NEXT:    mov x8, sp
182; CHECK-NEXT:    ptrue p2.b
183; CHECK-NEXT:    add x8, x8, #16
184; CHECK-NEXT:    st1b { z0.b }, p2, [sp, #1, mul vl]
185; CHECK-NEXT:    st1b { z1.b }, p2, [sp]
186; CHECK-NEXT:    st1b { z0.b }, p2, [sp, #3, mul vl]
187; CHECK-NEXT:    st1b { z1.b }, p2, [sp, #2, mul vl]
188; CHECK-NEXT:    st1b { z0.b }, p2, [sp, #5, mul vl]
189; CHECK-NEXT:    st1b { z1.b }, p2, [sp, #4, mul vl]
190; CHECK-NEXT:    st1b { z0.b }, p2, [sp, #7, mul vl]
191; CHECK-NEXT:    st1b { z1.b }, p2, [sp, #6, mul vl]
192; CHECK-NEXT:    ld1 { v0.b }[0], [x8]
193; CHECK-NEXT:    addvl x8, sp, #2
194; CHECK-NEXT:    add x8, x8, #17
195; CHECK-NEXT:    ld1 { v0.b }[2], [x8]
196; CHECK-NEXT:    addvl x8, sp, #4
197; CHECK-NEXT:    add x8, x8, #18
198; CHECK-NEXT:    ld1 { v0.b }[4], [x8]
199; CHECK-NEXT:    addvl x8, sp, #6
200; CHECK-NEXT:    add x8, x8, #19
201; CHECK-NEXT:    ld1 { v0.b }[6], [x8]
202; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
203; CHECK-NEXT:    addvl sp, sp, #8
204; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
205; CHECK-NEXT:    ret
206  %ext = call <4 x i1> @llvm.vector.extract.v4i1.nxv32i1(<vscale x 32 x i1> %arg, i64 16)
207  ret <4 x i1> %ext
208}
209
210define <4 x i1> @extract_v4i1_v32i1_16(<32 x i1> %arg) {
211; CHECK-LABEL: extract_v4i1_v32i1_16:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    ldr w8, [sp, #64]
214; CHECK-NEXT:    ldr w9, [sp, #72]
215; CHECK-NEXT:    fmov s0, w8
216; CHECK-NEXT:    ldr w8, [sp, #80]
217; CHECK-NEXT:    mov v0.h[1], w9
218; CHECK-NEXT:    mov v0.h[2], w8
219; CHECK-NEXT:    ldr w8, [sp, #88]
220; CHECK-NEXT:    mov v0.h[3], w8
221; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
222; CHECK-NEXT:    ret
223  %ext = call <4 x i1> @llvm.vector.extract.v4i1.v32i1(<32 x i1> %arg, i64 16)
224  ret <4 x i1> %ext
225}
226
227; The result type gets promoted, leading to us extracting 4 elements from a nxv32i3.
228; Hence we don't end up in SplitVecOp_EXTRACT_SUBVECTOR, but in SplitVecOp_EXTRACT_VECTOR_ELT instead.
229define <4 x i3> @extract_v4i3_nxv32i3_16(<vscale x 32 x i3> %arg) {
230; CHECK-LABEL: extract_v4i3_nxv32i3_16:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
233; CHECK-NEXT:    addvl sp, sp, #-8
234; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
235; CHECK-NEXT:    .cfi_offset w29, -16
236; CHECK-NEXT:    ptrue p0.b
237; CHECK-NEXT:    mov x8, sp
238; CHECK-NEXT:    add x8, x8, #16
239; CHECK-NEXT:    st1b { z1.b }, p0, [sp, #1, mul vl]
240; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
241; CHECK-NEXT:    st1b { z1.b }, p0, [sp, #3, mul vl]
242; CHECK-NEXT:    st1b { z0.b }, p0, [sp, #2, mul vl]
243; CHECK-NEXT:    st1b { z1.b }, p0, [sp, #5, mul vl]
244; CHECK-NEXT:    st1b { z0.b }, p0, [sp, #4, mul vl]
245; CHECK-NEXT:    st1b { z1.b }, p0, [sp, #7, mul vl]
246; CHECK-NEXT:    st1b { z0.b }, p0, [sp, #6, mul vl]
247; CHECK-NEXT:    ld1 { v0.b }[0], [x8]
248; CHECK-NEXT:    addvl x8, sp, #2
249; CHECK-NEXT:    add x8, x8, #17
250; CHECK-NEXT:    ld1 { v0.b }[2], [x8]
251; CHECK-NEXT:    addvl x8, sp, #4
252; CHECK-NEXT:    add x8, x8, #18
253; CHECK-NEXT:    ld1 { v0.b }[4], [x8]
254; CHECK-NEXT:    addvl x8, sp, #6
255; CHECK-NEXT:    add x8, x8, #19
256; CHECK-NEXT:    ld1 { v0.b }[6], [x8]
257; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
258; CHECK-NEXT:    addvl sp, sp, #8
259; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
260; CHECK-NEXT:    ret
261  %ext = call <4 x i3> @llvm.vector.extract.v4i3.nxv32i3(<vscale x 32 x i3> %arg, i64 16)
262  ret <4 x i3> %ext
263}
264
265; Extracting an illegal fixed-length vector from an illegal subvector
266
267define <2 x i32> @extract_v2i32_nxv16i32_2(<vscale x 16 x i32> %arg) {
268; CHECK-LABEL: extract_v2i32_nxv16i32_2:
269; CHECK:       // %bb.0:
270; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
271; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
272; CHECK-NEXT:    ret
273  %ext = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %arg, i64 2)
274  ret <2 x i32> %ext
275}
276
277define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
278; CHECK-LABEL: extract_v4i64_nxv8i64_0:
279; CHECK:       // %bb.0:
280; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
281; CHECK-NEXT:    addvl sp, sp, #-2
282; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
283; CHECK-NEXT:    .cfi_offset w29, -16
284; CHECK-NEXT:    ptrue p0.d
285; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
286; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
287; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
288; CHECK-NEXT:    ldr q1, [sp, #16]
289; CHECK-NEXT:    addvl sp, sp, #2
290; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
291; CHECK-NEXT:    ret
292  %ext = call <4 x i64> @llvm.vector.extract.v4i64.nxv8i64(<vscale x 8 x i64> %arg, i64 0)
293  ret <4 x i64> %ext
294}
295
296define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
297; CHECK-LABEL: extract_v4f16_nxv2f16_0:
298; CHECK:       // %bb.0:
299; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
300; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
301; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
302; CHECK-NEXT:    ret
303  %ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 0)
304  ret <4 x half> %ext
305}
306
307define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
308; CHECK-LABEL: extract_v4f16_nxv2f16_4:
309; CHECK:       // %bb.0:
310; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
311; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
312; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
313; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
314; CHECK-NEXT:    ret
315  %ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 4)
316  ret <4 x half> %ext
317}
318
319define <2 x half> @extract_v2f16_nxv4f16_2(<vscale x 4 x half> %arg) {
320; CHECK-LABEL: extract_v2f16_nxv4f16_2:
321; CHECK:       // %bb.0:
322; CHECK-NEXT:    mov z1.s, z0.s[3]
323; CHECK-NEXT:    mov z0.s, z0.s[2]
324; CHECK-NEXT:    mov v0.h[1], v1.h[0]
325; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
326; CHECK-NEXT:    ret
327  %ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 2)
328  ret <2 x half> %ext
329}
330
331define <2 x half> @extract_v2f16_nxv4f16_6(<vscale x 4 x half> %arg) {
332; CHECK-LABEL: extract_v2f16_nxv4f16_6:
333; CHECK:       // %bb.0:
334; CHECK-NEXT:    mov z1.s, z0.s[7]
335; CHECK-NEXT:    mov z0.s, z0.s[6]
336; CHECK-NEXT:    mov v0.h[1], v1.h[0]
337; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
338; CHECK-NEXT:    ret
339  %ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 6)
340  ret <2 x half> %ext
341}
342
343declare <4 x float> @llvm.vector.extract.v4f32.nxv16f32(<vscale x 16 x float>, i64)
344declare <2 x float> @llvm.vector.extract.v2f32.nxv16f32(<vscale x 16 x float>, i64)
345declare <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half>, i64);
346declare <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half>, i64);
347declare <2 x i64> @llvm.vector.extract.v2i64.nxv8i64(<vscale x 8 x i64>, i64)
348declare <4 x i64> @llvm.vector.extract.v4i64.nxv8i64(<vscale x 8 x i64>, i64)
349declare <4 x i32> @llvm.vector.extract.v4i32.nxv16i32(<vscale x 16 x i32>, i64)
350declare <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32>, i64)
351declare <8 x i16> @llvm.vector.extract.v8i16.nxv32i16(<vscale x 32 x i16>, i64)
352declare <4 x i16> @llvm.vector.extract.v4i16.nxv32i16(<vscale x 32 x i16>, i64)
353declare <2 x i16> @llvm.vector.extract.v2i16.nxv32i16(<vscale x 32 x i16>, i64)
354declare <4 x i1> @llvm.vector.extract.v4i1.nxv32i1(<vscale x 32 x i1>, i64)
355declare <4 x i1> @llvm.vector.extract.v4i1.v32i1(<32 x i1>, i64)
356declare <4 x i3> @llvm.vector.extract.v4i3.nxv32i3(<vscale x 32 x i3>, i64)
357