xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll (revision 4bd9edc15a323f09116c356404b0c926a02b69a9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32NOM
3; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32M
4; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64NOM
5; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64M
6
7; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M
8; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M
9
10define i8 @extractelt_v16i8(<16 x i8> %a) nounwind {
11; CHECK-LABEL: extractelt_v16i8:
12; CHECK:       # %bb.0:
13; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
14; CHECK-NEXT:    vslidedown.vi v8, v8, 7
15; CHECK-NEXT:    vmv.x.s a0, v8
16; CHECK-NEXT:    ret
17  %b = extractelement <16 x i8> %a, i32 7
18  ret i8 %b
19}
20
21define i16 @extractelt_v8i16(<8 x i16> %a) nounwind {
22; CHECK-LABEL: extractelt_v8i16:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
25; CHECK-NEXT:    vslidedown.vi v8, v8, 7
26; CHECK-NEXT:    vmv.x.s a0, v8
27; CHECK-NEXT:    ret
28  %b = extractelement <8 x i16> %a, i32 7
29  ret i16 %b
30}
31
32define i32 @extractelt_v4i32(<4 x i32> %a) nounwind {
33; CHECK-LABEL: extractelt_v4i32:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
36; CHECK-NEXT:    vslidedown.vi v8, v8, 2
37; CHECK-NEXT:    vmv.x.s a0, v8
38; CHECK-NEXT:    ret
39  %b = extractelement <4 x i32> %a, i32 2
40  ret i32 %b
41}
42
43define i64 @extractelt_v2i64(<2 x i64> %a) nounwind {
44; RV32-LABEL: extractelt_v2i64:
45; RV32:       # %bb.0:
46; RV32-NEXT:    li a0, 32
47; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
48; RV32-NEXT:    vsrl.vx v9, v8, a0
49; RV32-NEXT:    vmv.x.s a1, v9
50; RV32-NEXT:    vmv.x.s a0, v8
51; RV32-NEXT:    ret
52;
53; RV64-LABEL: extractelt_v2i64:
54; RV64:       # %bb.0:
55; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
56; RV64-NEXT:    vmv.x.s a0, v8
57; RV64-NEXT:    ret
58  %b = extractelement <2 x i64> %a, i32 0
59  ret i64 %b
60}
61
62define bfloat @extractelt_v8bf16(<8 x bfloat> %a) nounwind {
63; CHECK-LABEL: extractelt_v8bf16:
64; CHECK:       # %bb.0:
65; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
66; CHECK-NEXT:    vslidedown.vi v8, v8, 7
67; CHECK-NEXT:    vmv.x.s a0, v8
68; CHECK-NEXT:    fmv.h.x fa0, a0
69; CHECK-NEXT:    ret
70  %b = extractelement <8 x bfloat> %a, i32 7
71  ret bfloat %b
72}
73
74define half @extractelt_v8f16(<8 x half> %a) nounwind {
75; ZVFH-LABEL: extractelt_v8f16:
76; ZVFH:       # %bb.0:
77; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
78; ZVFH-NEXT:    vslidedown.vi v8, v8, 7
79; ZVFH-NEXT:    vfmv.f.s fa0, v8
80; ZVFH-NEXT:    ret
81;
82; ZVFHMIN-LABEL: extractelt_v8f16:
83; ZVFHMIN:       # %bb.0:
84; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
85; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 7
86; ZVFHMIN-NEXT:    vmv.x.s a0, v8
87; ZVFHMIN-NEXT:    fmv.h.x fa0, a0
88; ZVFHMIN-NEXT:    ret
89  %b = extractelement <8 x half> %a, i32 7
90  ret half %b
91}
92
93define float @extractelt_v4f32(<4 x float> %a) nounwind {
94; CHECK-LABEL: extractelt_v4f32:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
97; CHECK-NEXT:    vslidedown.vi v8, v8, 2
98; CHECK-NEXT:    vfmv.f.s fa0, v8
99; CHECK-NEXT:    ret
100  %b = extractelement <4 x float> %a, i32 2
101  ret float %b
102}
103
104define double @extractelt_v2f64(<2 x double> %a) nounwind {
105; CHECK-LABEL: extractelt_v2f64:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
108; CHECK-NEXT:    vfmv.f.s fa0, v8
109; CHECK-NEXT:    ret
110  %b = extractelement <2 x double> %a, i32 0
111  ret double %b
112}
113
114define i8 @extractelt_v32i8(<32 x i8> %a) nounwind {
115; CHECK-LABEL: extractelt_v32i8:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
118; CHECK-NEXT:    vslidedown.vi v8, v8, 7
119; CHECK-NEXT:    vmv.x.s a0, v8
120; CHECK-NEXT:    ret
121  %b = extractelement <32 x i8> %a, i32 7
122  ret i8 %b
123}
124
125define i16 @extractelt_v16i16(<16 x i16> %a) nounwind {
126; CHECK-LABEL: extractelt_v16i16:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
129; CHECK-NEXT:    vslidedown.vi v8, v8, 7
130; CHECK-NEXT:    vmv.x.s a0, v8
131; CHECK-NEXT:    ret
132  %b = extractelement <16 x i16> %a, i32 7
133  ret i16 %b
134}
135
136define i32 @extractelt_v8i32(<8 x i32> %a) nounwind {
137; CHECK-LABEL: extractelt_v8i32:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
140; CHECK-NEXT:    vslidedown.vi v8, v8, 6
141; CHECK-NEXT:    vmv.x.s a0, v8
142; CHECK-NEXT:    ret
143  %b = extractelement <8 x i32> %a, i32 6
144  ret i32 %b
145}
146
147define i64 @extractelt_v4i64(<4 x i64> %a) nounwind {
148; RV32-LABEL: extractelt_v4i64:
149; RV32:       # %bb.0:
150; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
151; RV32-NEXT:    vslidedown.vi v8, v8, 3
152; RV32-NEXT:    li a0, 32
153; RV32-NEXT:    vsrl.vx v10, v8, a0
154; RV32-NEXT:    vmv.x.s a1, v10
155; RV32-NEXT:    vmv.x.s a0, v8
156; RV32-NEXT:    ret
157;
158; RV64-LABEL: extractelt_v4i64:
159; RV64:       # %bb.0:
160; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
161; RV64-NEXT:    vslidedown.vi v8, v8, 3
162; RV64-NEXT:    vmv.x.s a0, v8
163; RV64-NEXT:    ret
164  %b = extractelement <4 x i64> %a, i32 3
165  ret i64 %b
166}
167
168define bfloat @extractelt_v16bf16(<16 x bfloat> %a) nounwind {
169; CHECK-LABEL: extractelt_v16bf16:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
172; CHECK-NEXT:    vslidedown.vi v8, v8, 7
173; CHECK-NEXT:    vmv.x.s a0, v8
174; CHECK-NEXT:    fmv.h.x fa0, a0
175; CHECK-NEXT:    ret
176  %b = extractelement <16 x bfloat> %a, i32 7
177  ret bfloat %b
178}
179
180define half @extractelt_v16f16(<16 x half> %a) nounwind {
181; ZVFH-LABEL: extractelt_v16f16:
182; ZVFH:       # %bb.0:
183; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
184; ZVFH-NEXT:    vslidedown.vi v8, v8, 7
185; ZVFH-NEXT:    vfmv.f.s fa0, v8
186; ZVFH-NEXT:    ret
187;
188; ZVFHMIN-LABEL: extractelt_v16f16:
189; ZVFHMIN:       # %bb.0:
190; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
191; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 7
192; ZVFHMIN-NEXT:    vmv.x.s a0, v8
193; ZVFHMIN-NEXT:    fmv.h.x fa0, a0
194; ZVFHMIN-NEXT:    ret
195  %b = extractelement <16 x half> %a, i32 7
196  ret half %b
197}
198
199define float @extractelt_v8f32(<8 x float> %a) nounwind {
200; CHECK-LABEL: extractelt_v8f32:
201; CHECK:       # %bb.0:
202; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
203; CHECK-NEXT:    vslidedown.vi v8, v8, 2
204; CHECK-NEXT:    vfmv.f.s fa0, v8
205; CHECK-NEXT:    ret
206  %b = extractelement <8 x float> %a, i32 2
207  ret float %b
208}
209
210define double @extractelt_v4f64(<4 x double> %a) nounwind {
211; CHECK-LABEL: extractelt_v4f64:
212; CHECK:       # %bb.0:
213; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
214; CHECK-NEXT:    vfmv.f.s fa0, v8
215; CHECK-NEXT:    ret
216  %b = extractelement <4 x double> %a, i32 0
217  ret double %b
218}
219
220; This uses a non-power of 2 type so that it isn't an MVT to catch an
221; incorrect use of getSimpleValueType().
222; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
223; slidedowns and extracts.
224define i64 @extractelt_v3i64(<3 x i64> %a) nounwind {
225; RV32-LABEL: extractelt_v3i64:
226; RV32:       # %bb.0:
227; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
228; RV32-NEXT:    vslidedown.vi v10, v8, 4
229; RV32-NEXT:    vslidedown.vi v8, v8, 5
230; RV32-NEXT:    vmv.x.s a0, v10
231; RV32-NEXT:    vmv.x.s a1, v8
232; RV32-NEXT:    ret
233;
234; RV64-LABEL: extractelt_v3i64:
235; RV64:       # %bb.0:
236; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
237; RV64-NEXT:    vslidedown.vi v8, v8, 2
238; RV64-NEXT:    vmv.x.s a0, v8
239; RV64-NEXT:    ret
240  %b = extractelement <3 x i64> %a, i32 2
241  ret i64 %b
242}
243
244; A LMUL8 type
245define i32 @extractelt_v32i32(<32 x i32> %a) nounwind {
246; RV32-LABEL: extractelt_v32i32:
247; RV32:       # %bb.0:
248; RV32-NEXT:    addi sp, sp, -256
249; RV32-NEXT:    sw ra, 252(sp) # 4-byte Folded Spill
250; RV32-NEXT:    sw s0, 248(sp) # 4-byte Folded Spill
251; RV32-NEXT:    addi s0, sp, 256
252; RV32-NEXT:    andi sp, sp, -128
253; RV32-NEXT:    li a0, 32
254; RV32-NEXT:    mv a1, sp
255; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
256; RV32-NEXT:    vse32.v v8, (a1)
257; RV32-NEXT:    lw a0, 124(sp)
258; RV32-NEXT:    addi sp, s0, -256
259; RV32-NEXT:    lw ra, 252(sp) # 4-byte Folded Reload
260; RV32-NEXT:    lw s0, 248(sp) # 4-byte Folded Reload
261; RV32-NEXT:    addi sp, sp, 256
262; RV32-NEXT:    ret
263;
264; RV64-LABEL: extractelt_v32i32:
265; RV64:       # %bb.0:
266; RV64-NEXT:    addi sp, sp, -256
267; RV64-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
268; RV64-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
269; RV64-NEXT:    addi s0, sp, 256
270; RV64-NEXT:    andi sp, sp, -128
271; RV64-NEXT:    li a0, 32
272; RV64-NEXT:    mv a1, sp
273; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
274; RV64-NEXT:    vse32.v v8, (a1)
275; RV64-NEXT:    lw a0, 124(sp)
276; RV64-NEXT:    addi sp, s0, -256
277; RV64-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
278; RV64-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
279; RV64-NEXT:    addi sp, sp, 256
280; RV64-NEXT:    ret
281  %b = extractelement <32 x i32> %a, i32 31
282  ret i32 %b
283}
284
285; Exercise type legalization for type beyond LMUL8
286define i32 @extractelt_v64i32(<64 x i32> %a) nounwind {
287; RV32-LABEL: extractelt_v64i32:
288; RV32:       # %bb.0:
289; RV32-NEXT:    addi sp, sp, -256
290; RV32-NEXT:    sw ra, 252(sp) # 4-byte Folded Spill
291; RV32-NEXT:    sw s0, 248(sp) # 4-byte Folded Spill
292; RV32-NEXT:    addi s0, sp, 256
293; RV32-NEXT:    andi sp, sp, -128
294; RV32-NEXT:    li a0, 32
295; RV32-NEXT:    mv a1, sp
296; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
297; RV32-NEXT:    vse32.v v16, (a1)
298; RV32-NEXT:    lw a0, 124(sp)
299; RV32-NEXT:    addi sp, s0, -256
300; RV32-NEXT:    lw ra, 252(sp) # 4-byte Folded Reload
301; RV32-NEXT:    lw s0, 248(sp) # 4-byte Folded Reload
302; RV32-NEXT:    addi sp, sp, 256
303; RV32-NEXT:    ret
304;
305; RV64-LABEL: extractelt_v64i32:
306; RV64:       # %bb.0:
307; RV64-NEXT:    addi sp, sp, -256
308; RV64-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
309; RV64-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
310; RV64-NEXT:    addi s0, sp, 256
311; RV64-NEXT:    andi sp, sp, -128
312; RV64-NEXT:    li a0, 32
313; RV64-NEXT:    mv a1, sp
314; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
315; RV64-NEXT:    vse32.v v16, (a1)
316; RV64-NEXT:    lw a0, 124(sp)
317; RV64-NEXT:    addi sp, s0, -256
318; RV64-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
319; RV64-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
320; RV64-NEXT:    addi sp, sp, 256
321; RV64-NEXT:    ret
322  %b = extractelement <64 x i32> %a, i32 63
323  ret i32 %b
324}
325
326define i8 @extractelt_v16i8_idx(<16 x i8> %a, i32 zeroext %idx) nounwind {
327; CHECK-LABEL: extractelt_v16i8_idx:
328; CHECK:       # %bb.0:
329; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
330; CHECK-NEXT:    vslidedown.vx v8, v8, a0
331; CHECK-NEXT:    vmv.x.s a0, v8
332; CHECK-NEXT:    ret
333  %b = extractelement <16 x i8> %a, i32 %idx
334  ret i8 %b
335}
336
337define i16 @extractelt_v8i16_idx(<8 x i16> %a, i32 zeroext %idx) nounwind {
338; CHECK-LABEL: extractelt_v8i16_idx:
339; CHECK:       # %bb.0:
340; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
341; CHECK-NEXT:    vslidedown.vx v8, v8, a0
342; CHECK-NEXT:    vmv.x.s a0, v8
343; CHECK-NEXT:    ret
344  %b = extractelement <8 x i16> %a, i32 %idx
345  ret i16 %b
346}
347
348define i32 @extractelt_v4i32_idx(<4 x i32> %a, i32 zeroext %idx) nounwind {
349; CHECK-LABEL: extractelt_v4i32_idx:
350; CHECK:       # %bb.0:
351; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
352; CHECK-NEXT:    vadd.vv v8, v8, v8
353; CHECK-NEXT:    vslidedown.vx v8, v8, a0
354; CHECK-NEXT:    vmv.x.s a0, v8
355; CHECK-NEXT:    ret
356  %b = add <4 x i32> %a, %a
357  %c = extractelement <4 x i32> %b, i32 %idx
358  ret i32 %c
359}
360
361define i64 @extractelt_v2i64_idx(<2 x i64> %a, i32 zeroext %idx) nounwind {
362; RV32-LABEL: extractelt_v2i64_idx:
363; RV32:       # %bb.0:
364; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
365; RV32-NEXT:    vadd.vv v8, v8, v8
366; RV32-NEXT:    li a1, 32
367; RV32-NEXT:    vslidedown.vx v8, v8, a0
368; RV32-NEXT:    vmv.x.s a0, v8
369; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
370; RV32-NEXT:    vsrl.vx v8, v8, a1
371; RV32-NEXT:    vmv.x.s a1, v8
372; RV32-NEXT:    ret
373;
374; RV64-LABEL: extractelt_v2i64_idx:
375; RV64:       # %bb.0:
376; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
377; RV64-NEXT:    vadd.vv v8, v8, v8
378; RV64-NEXT:    vslidedown.vx v8, v8, a0
379; RV64-NEXT:    vmv.x.s a0, v8
380; RV64-NEXT:    ret
381  %b = add <2 x i64> %a, %a
382  %c = extractelement <2 x i64> %b, i32 %idx
383  ret i64 %c
384}
385
386define bfloat @extractelt_v8bf16_idx(<8 x bfloat> %a, i32 zeroext %idx) nounwind {
387; CHECK-LABEL: extractelt_v8bf16_idx:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
390; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
391; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
392; CHECK-NEXT:    vfadd.vv v8, v10, v10
393; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
394; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
395; CHECK-NEXT:    vslidedown.vx v8, v10, a0
396; CHECK-NEXT:    vmv.x.s a0, v8
397; CHECK-NEXT:    fmv.h.x fa0, a0
398; CHECK-NEXT:    ret
399  %b = fadd <8 x bfloat> %a, %a
400  %c = extractelement <8 x bfloat> %b, i32 %idx
401  ret bfloat %c
402}
403
404define half @extractelt_v8f16_idx(<8 x half> %a, i32 zeroext %idx) nounwind {
405; ZVFH-LABEL: extractelt_v8f16_idx:
406; ZVFH:       # %bb.0:
407; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
408; ZVFH-NEXT:    vfadd.vv v8, v8, v8
409; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
410; ZVFH-NEXT:    vfmv.f.s fa0, v8
411; ZVFH-NEXT:    ret
412;
413; ZVFHMIN-LABEL: extractelt_v8f16_idx:
414; ZVFHMIN:       # %bb.0:
415; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
416; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
417; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
418; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
419; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
420; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
421; ZVFHMIN-NEXT:    vslidedown.vx v8, v10, a0
422; ZVFHMIN-NEXT:    vmv.x.s a0, v8
423; ZVFHMIN-NEXT:    fmv.h.x fa0, a0
424; ZVFHMIN-NEXT:    ret
425  %b = fadd <8 x half> %a, %a
426  %c = extractelement <8 x half> %b, i32 %idx
427  ret half %c
428}
429
430define float @extractelt_v4f32_idx(<4 x float> %a, i32 zeroext %idx) nounwind {
431; CHECK-LABEL: extractelt_v4f32_idx:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
434; CHECK-NEXT:    vfadd.vv v8, v8, v8
435; CHECK-NEXT:    vslidedown.vx v8, v8, a0
436; CHECK-NEXT:    vfmv.f.s fa0, v8
437; CHECK-NEXT:    ret
438  %b = fadd <4 x float> %a, %a
439  %c = extractelement <4 x float> %b, i32 %idx
440  ret float %c
441}
442
443define double @extractelt_v2f64_idx(<2 x double> %a, i32 zeroext %idx) nounwind {
444; CHECK-LABEL: extractelt_v2f64_idx:
445; CHECK:       # %bb.0:
446; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
447; CHECK-NEXT:    vfadd.vv v8, v8, v8
448; CHECK-NEXT:    vslidedown.vx v8, v8, a0
449; CHECK-NEXT:    vfmv.f.s fa0, v8
450; CHECK-NEXT:    ret
451  %b = fadd <2 x double> %a, %a
452  %c = extractelement <2 x double> %b, i32 %idx
453  ret double %c
454}
455
456define i8 @extractelt_v32i8_idx(<32 x i8> %a, i32 zeroext %idx) nounwind {
457; CHECK-LABEL: extractelt_v32i8_idx:
458; CHECK:       # %bb.0:
459; CHECK-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
460; CHECK-NEXT:    vslidedown.vx v8, v8, a0
461; CHECK-NEXT:    vmv.x.s a0, v8
462; CHECK-NEXT:    ret
463  %b = extractelement <32 x i8> %a, i32 %idx
464  ret i8 %b
465}
466
467define i16 @extractelt_v16i16_idx(<16 x i16> %a, i32 zeroext %idx) nounwind {
468; CHECK-LABEL: extractelt_v16i16_idx:
469; CHECK:       # %bb.0:
470; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
471; CHECK-NEXT:    vslidedown.vx v8, v8, a0
472; CHECK-NEXT:    vmv.x.s a0, v8
473; CHECK-NEXT:    ret
474  %b = extractelement <16 x i16> %a, i32 %idx
475  ret i16 %b
476}
477
478define i32 @extractelt_v8i32_idx(<8 x i32> %a, i32 zeroext %idx) nounwind {
479; CHECK-LABEL: extractelt_v8i32_idx:
480; CHECK:       # %bb.0:
481; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
482; CHECK-NEXT:    vadd.vv v8, v8, v8
483; CHECK-NEXT:    vslidedown.vx v8, v8, a0
484; CHECK-NEXT:    vmv.x.s a0, v8
485; CHECK-NEXT:    ret
486  %b = add <8 x i32> %a, %a
487  %c = extractelement <8 x i32> %b, i32 %idx
488  ret i32 %c
489}
490
491define i64 @extractelt_v4i64_idx(<4 x i64> %a, i32 zeroext %idx) nounwind {
492; RV32-LABEL: extractelt_v4i64_idx:
493; RV32:       # %bb.0:
494; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
495; RV32-NEXT:    vadd.vv v8, v8, v8
496; RV32-NEXT:    li a1, 32
497; RV32-NEXT:    vslidedown.vx v8, v8, a0
498; RV32-NEXT:    vmv.x.s a0, v8
499; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
500; RV32-NEXT:    vsrl.vx v8, v8, a1
501; RV32-NEXT:    vmv.x.s a1, v8
502; RV32-NEXT:    ret
503;
504; RV64-LABEL: extractelt_v4i64_idx:
505; RV64:       # %bb.0:
506; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
507; RV64-NEXT:    vadd.vv v8, v8, v8
508; RV64-NEXT:    vslidedown.vx v8, v8, a0
509; RV64-NEXT:    vmv.x.s a0, v8
510; RV64-NEXT:    ret
511  %b = add <4 x i64> %a, %a
512  %c = extractelement <4 x i64> %b, i32 %idx
513  ret i64 %c
514}
515
516define bfloat @extractelt_v16bf16_idx(<16 x bfloat> %a, i32 zeroext %idx) nounwind {
517; CHECK-LABEL: extractelt_v16bf16_idx:
518; CHECK:       # %bb.0:
519; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
520; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
521; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
522; CHECK-NEXT:    vfadd.vv v8, v12, v12
523; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
524; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
525; CHECK-NEXT:    vslidedown.vx v8, v12, a0
526; CHECK-NEXT:    vmv.x.s a0, v8
527; CHECK-NEXT:    fmv.h.x fa0, a0
528; CHECK-NEXT:    ret
529  %b = fadd <16 x bfloat> %a, %a
530  %c = extractelement <16 x bfloat> %b, i32 %idx
531  ret bfloat %c
532}
533
534define half @extractelt_v16f16_idx(<16 x half> %a, i32 zeroext %idx) nounwind {
535; ZVFH-LABEL: extractelt_v16f16_idx:
536; ZVFH:       # %bb.0:
537; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
538; ZVFH-NEXT:    vfadd.vv v8, v8, v8
539; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
540; ZVFH-NEXT:    vfmv.f.s fa0, v8
541; ZVFH-NEXT:    ret
542;
543; ZVFHMIN-LABEL: extractelt_v16f16_idx:
544; ZVFHMIN:       # %bb.0:
545; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
546; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
547; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
548; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v12
549; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
550; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
551; ZVFHMIN-NEXT:    vslidedown.vx v8, v12, a0
552; ZVFHMIN-NEXT:    vmv.x.s a0, v8
553; ZVFHMIN-NEXT:    fmv.h.x fa0, a0
554; ZVFHMIN-NEXT:    ret
555  %b = fadd <16 x half> %a, %a
556  %c = extractelement <16 x half> %b, i32 %idx
557  ret half %c
558}
559
560define float @extractelt_v8f32_idx(<8 x float> %a, i32 zeroext %idx) nounwind {
561; CHECK-LABEL: extractelt_v8f32_idx:
562; CHECK:       # %bb.0:
563; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
564; CHECK-NEXT:    vfadd.vv v8, v8, v8
565; CHECK-NEXT:    vslidedown.vx v8, v8, a0
566; CHECK-NEXT:    vfmv.f.s fa0, v8
567; CHECK-NEXT:    ret
568  %b = fadd <8 x float> %a, %a
569  %c = extractelement <8 x float> %b, i32 %idx
570  ret float %c
571}
572
573define double @extractelt_v4f64_idx(<4 x double> %a, i32 zeroext %idx) nounwind {
574; CHECK-LABEL: extractelt_v4f64_idx:
575; CHECK:       # %bb.0:
576; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
577; CHECK-NEXT:    vfadd.vv v8, v8, v8
578; CHECK-NEXT:    vslidedown.vx v8, v8, a0
579; CHECK-NEXT:    vfmv.f.s fa0, v8
580; CHECK-NEXT:    ret
581  %b = fadd <4 x double> %a, %a
582  %c = extractelement <4 x double> %b, i32 %idx
583  ret double %c
584}
585
586; This uses a non-power of 2 type so that it isn't an MVT to catch an
587; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx).
588; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
589; slidedowns and extracts.
590define i64 @extractelt_v3i64_idx(<3 x i64> %a, i32 zeroext %idx) nounwind {
591; RV32-LABEL: extractelt_v3i64_idx:
592; RV32:       # %bb.0:
593; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
594; RV32-NEXT:    vadd.vv v8, v8, v8
595; RV32-NEXT:    add a0, a0, a0
596; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
597; RV32-NEXT:    vslidedown.vx v10, v8, a0
598; RV32-NEXT:    addi a1, a0, 1
599; RV32-NEXT:    vmv.x.s a0, v10
600; RV32-NEXT:    vslidedown.vx v8, v8, a1
601; RV32-NEXT:    vmv.x.s a1, v8
602; RV32-NEXT:    ret
603;
604; RV64-LABEL: extractelt_v3i64_idx:
605; RV64:       # %bb.0:
606; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
607; RV64-NEXT:    vadd.vv v8, v8, v8
608; RV64-NEXT:    vslidedown.vx v8, v8, a0
609; RV64-NEXT:    vmv.x.s a0, v8
610; RV64-NEXT:    ret
611  %b = add <3 x i64> %a, %a
612  %c = extractelement <3 x i64> %b, i32 %idx
613  ret i64 %c
614}
615
616define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind {
617; RV32NOM-LABEL: extractelt_v32i32_idx:
618; RV32NOM:       # %bb.0:
619; RV32NOM-NEXT:    addi sp, sp, -256
620; RV32NOM-NEXT:    sw ra, 252(sp) # 4-byte Folded Spill
621; RV32NOM-NEXT:    sw s0, 248(sp) # 4-byte Folded Spill
622; RV32NOM-NEXT:    sw s2, 244(sp) # 4-byte Folded Spill
623; RV32NOM-NEXT:    addi s0, sp, 256
624; RV32NOM-NEXT:    andi sp, sp, -128
625; RV32NOM-NEXT:    mv s2, a0
626; RV32NOM-NEXT:    andi a0, a1, 31
627; RV32NOM-NEXT:    li a1, 4
628; RV32NOM-NEXT:    call __mulsi3
629; RV32NOM-NEXT:    li a1, 32
630; RV32NOM-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
631; RV32NOM-NEXT:    vle32.v v8, (s2)
632; RV32NOM-NEXT:    mv a1, sp
633; RV32NOM-NEXT:    add a0, a1, a0
634; RV32NOM-NEXT:    vadd.vv v8, v8, v8
635; RV32NOM-NEXT:    vse32.v v8, (a1)
636; RV32NOM-NEXT:    lw a0, 0(a0)
637; RV32NOM-NEXT:    addi sp, s0, -256
638; RV32NOM-NEXT:    lw ra, 252(sp) # 4-byte Folded Reload
639; RV32NOM-NEXT:    lw s0, 248(sp) # 4-byte Folded Reload
640; RV32NOM-NEXT:    lw s2, 244(sp) # 4-byte Folded Reload
641; RV32NOM-NEXT:    addi sp, sp, 256
642; RV32NOM-NEXT:    ret
643;
644; RV32M-LABEL: extractelt_v32i32_idx:
645; RV32M:       # %bb.0:
646; RV32M-NEXT:    addi sp, sp, -256
647; RV32M-NEXT:    sw ra, 252(sp) # 4-byte Folded Spill
648; RV32M-NEXT:    sw s0, 248(sp) # 4-byte Folded Spill
649; RV32M-NEXT:    addi s0, sp, 256
650; RV32M-NEXT:    andi sp, sp, -128
651; RV32M-NEXT:    andi a1, a1, 31
652; RV32M-NEXT:    li a2, 32
653; RV32M-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
654; RV32M-NEXT:    vle32.v v8, (a0)
655; RV32M-NEXT:    slli a1, a1, 2
656; RV32M-NEXT:    mv a0, sp
657; RV32M-NEXT:    or a1, a0, a1
658; RV32M-NEXT:    vadd.vv v8, v8, v8
659; RV32M-NEXT:    vse32.v v8, (a0)
660; RV32M-NEXT:    lw a0, 0(a1)
661; RV32M-NEXT:    addi sp, s0, -256
662; RV32M-NEXT:    lw ra, 252(sp) # 4-byte Folded Reload
663; RV32M-NEXT:    lw s0, 248(sp) # 4-byte Folded Reload
664; RV32M-NEXT:    addi sp, sp, 256
665; RV32M-NEXT:    ret
666;
667; RV64NOM-LABEL: extractelt_v32i32_idx:
668; RV64NOM:       # %bb.0:
669; RV64NOM-NEXT:    addi sp, sp, -256
670; RV64NOM-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
671; RV64NOM-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
672; RV64NOM-NEXT:    sd s2, 232(sp) # 8-byte Folded Spill
673; RV64NOM-NEXT:    addi s0, sp, 256
674; RV64NOM-NEXT:    andi sp, sp, -128
675; RV64NOM-NEXT:    mv s2, a0
676; RV64NOM-NEXT:    andi a0, a1, 31
677; RV64NOM-NEXT:    li a1, 4
678; RV64NOM-NEXT:    call __muldi3
679; RV64NOM-NEXT:    li a1, 32
680; RV64NOM-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
681; RV64NOM-NEXT:    vle32.v v8, (s2)
682; RV64NOM-NEXT:    mv a1, sp
683; RV64NOM-NEXT:    add a0, a1, a0
684; RV64NOM-NEXT:    vadd.vv v8, v8, v8
685; RV64NOM-NEXT:    vse32.v v8, (a1)
686; RV64NOM-NEXT:    lw a0, 0(a0)
687; RV64NOM-NEXT:    addi sp, s0, -256
688; RV64NOM-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
689; RV64NOM-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
690; RV64NOM-NEXT:    ld s2, 232(sp) # 8-byte Folded Reload
691; RV64NOM-NEXT:    addi sp, sp, 256
692; RV64NOM-NEXT:    ret
693;
694; RV64M-LABEL: extractelt_v32i32_idx:
695; RV64M:       # %bb.0:
696; RV64M-NEXT:    addi sp, sp, -256
697; RV64M-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
698; RV64M-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
699; RV64M-NEXT:    addi s0, sp, 256
700; RV64M-NEXT:    andi sp, sp, -128
701; RV64M-NEXT:    andi a1, a1, 31
702; RV64M-NEXT:    li a2, 32
703; RV64M-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
704; RV64M-NEXT:    vle32.v v8, (a0)
705; RV64M-NEXT:    slli a1, a1, 2
706; RV64M-NEXT:    mv a0, sp
707; RV64M-NEXT:    or a1, a0, a1
708; RV64M-NEXT:    vadd.vv v8, v8, v8
709; RV64M-NEXT:    vse32.v v8, (a0)
710; RV64M-NEXT:    lw a0, 0(a1)
711; RV64M-NEXT:    addi sp, s0, -256
712; RV64M-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
713; RV64M-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
714; RV64M-NEXT:    addi sp, sp, 256
715; RV64M-NEXT:    ret
716  %a = load <32 x i32>, ptr %x
717  %b = add <32 x i32> %a, %a
718  %c = extractelement <32 x i32> %b, i32 %idx
719  ret i32 %c
720}
721
722define i32 @extractelt_v64i32_idx(<64 x i32> %a, i32 zeroext %idx) nounwind {
723; RV32-LABEL: extractelt_v64i32_idx:
724; RV32:       # %bb.0:
725; RV32-NEXT:    addi sp, sp, -384
726; RV32-NEXT:    sw ra, 380(sp) # 4-byte Folded Spill
727; RV32-NEXT:    sw s0, 376(sp) # 4-byte Folded Spill
728; RV32-NEXT:    addi s0, sp, 384
729; RV32-NEXT:    andi sp, sp, -128
730; RV32-NEXT:    andi a0, a0, 63
731; RV32-NEXT:    mv a1, sp
732; RV32-NEXT:    li a2, 32
733; RV32-NEXT:    addi a3, sp, 128
734; RV32-NEXT:    slli a0, a0, 2
735; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
736; RV32-NEXT:    vadd.vv v8, v8, v8
737; RV32-NEXT:    vadd.vv v16, v16, v16
738; RV32-NEXT:    add a0, a1, a0
739; RV32-NEXT:    vse32.v v16, (a3)
740; RV32-NEXT:    vse32.v v8, (a1)
741; RV32-NEXT:    lw a0, 0(a0)
742; RV32-NEXT:    addi sp, s0, -384
743; RV32-NEXT:    lw ra, 380(sp) # 4-byte Folded Reload
744; RV32-NEXT:    lw s0, 376(sp) # 4-byte Folded Reload
745; RV32-NEXT:    addi sp, sp, 384
746; RV32-NEXT:    ret
747;
748; RV64-LABEL: extractelt_v64i32_idx:
749; RV64:       # %bb.0:
750; RV64-NEXT:    addi sp, sp, -384
751; RV64-NEXT:    sd ra, 376(sp) # 8-byte Folded Spill
752; RV64-NEXT:    sd s0, 368(sp) # 8-byte Folded Spill
753; RV64-NEXT:    addi s0, sp, 384
754; RV64-NEXT:    andi sp, sp, -128
755; RV64-NEXT:    andi a0, a0, 63
756; RV64-NEXT:    mv a1, sp
757; RV64-NEXT:    li a2, 32
758; RV64-NEXT:    addi a3, sp, 128
759; RV64-NEXT:    slli a0, a0, 2
760; RV64-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
761; RV64-NEXT:    vadd.vv v8, v8, v8
762; RV64-NEXT:    vadd.vv v16, v16, v16
763; RV64-NEXT:    add a0, a1, a0
764; RV64-NEXT:    vse32.v v16, (a3)
765; RV64-NEXT:    vse32.v v8, (a1)
766; RV64-NEXT:    lw a0, 0(a0)
767; RV64-NEXT:    addi sp, s0, -384
768; RV64-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
769; RV64-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
770; RV64-NEXT:    addi sp, sp, 384
771; RV64-NEXT:    ret
772  %b = add <64 x i32> %a, %a
773  %c = extractelement <64 x i32> %b, i32 %idx
774  ret i32 %c
775}
776
777define void @store_extractelt_v16i8(<16 x i8> %a, ptr %p) nounwind {
778; CHECK-LABEL: store_extractelt_v16i8:
779; CHECK:       # %bb.0:
780; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
781; CHECK-NEXT:    vslidedown.vi v8, v8, 7
782; CHECK-NEXT:    vse8.v v8, (a0)
783; CHECK-NEXT:    ret
784  %b = extractelement <16 x i8> %a, i32 7
785  store i8 %b, ptr %p
786  ret void
787}
788
789define void @store_extractelt_v8i16(<8 x i16> %a, ptr %p) nounwind {
790; CHECK-LABEL: store_extractelt_v8i16:
791; CHECK:       # %bb.0:
792; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
793; CHECK-NEXT:    vslidedown.vi v8, v8, 7
794; CHECK-NEXT:    vse16.v v8, (a0)
795; CHECK-NEXT:    ret
796  %b = extractelement <8 x i16> %a, i32 7
797  store i16 %b, ptr %p
798  ret void
799}
800
801define void @store_extractelt_v4i32(<4 x i32> %a, ptr %p) nounwind {
802; CHECK-LABEL: store_extractelt_v4i32:
803; CHECK:       # %bb.0:
804; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
805; CHECK-NEXT:    vslidedown.vi v8, v8, 2
806; CHECK-NEXT:    vse32.v v8, (a0)
807; CHECK-NEXT:    ret
808  %b = extractelement <4 x i32> %a, i32 2
809  store i32 %b, ptr %p
810  ret void
811}
812
813; FIXME: Use vse64.v on RV32 to avoid two scalar extracts and two scalar stores.
814define void @store_extractelt_v2i64(<2 x i64> %a, ptr %p) nounwind {
815; RV32-LABEL: store_extractelt_v2i64:
816; RV32:       # %bb.0:
817; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
818; RV32-NEXT:    vslidedown.vi v8, v8, 1
819; RV32-NEXT:    li a1, 32
820; RV32-NEXT:    vsrl.vx v9, v8, a1
821; RV32-NEXT:    vmv.x.s a1, v8
822; RV32-NEXT:    vmv.x.s a2, v9
823; RV32-NEXT:    sw a1, 0(a0)
824; RV32-NEXT:    sw a2, 4(a0)
825; RV32-NEXT:    ret
826;
827; RV64-LABEL: store_extractelt_v2i64:
828; RV64:       # %bb.0:
829; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
830; RV64-NEXT:    vslidedown.vi v8, v8, 1
831; RV64-NEXT:    vse64.v v8, (a0)
832; RV64-NEXT:    ret
833  %b = extractelement <2 x i64> %a, i64 1
834  store i64 %b, ptr %p
835  ret void
836}
837
838define void @store_extractelt_v2f64(<2 x double> %a, ptr %p) nounwind {
839; CHECK-LABEL: store_extractelt_v2f64:
840; CHECK:       # %bb.0:
841; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
842; CHECK-NEXT:    vslidedown.vi v8, v8, 1
843; CHECK-NEXT:    vse64.v v8, (a0)
844; CHECK-NEXT:    ret
845  %b = extractelement <2 x double> %a, i64 1
846  store double %b, ptr %p
847  ret void
848}
849
850define i32 @extractelt_add_v4i32(<4 x i32> %x) {
851; RV32-LABEL: extractelt_add_v4i32:
852; RV32:       # %bb.0:
853; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
854; RV32-NEXT:    vslidedown.vi v8, v8, 2
855; RV32-NEXT:    vmv.x.s a0, v8
856; RV32-NEXT:    addi a0, a0, 13
857; RV32-NEXT:    ret
858;
859; RV64-LABEL: extractelt_add_v4i32:
860; RV64:       # %bb.0:
861; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
862; RV64-NEXT:    vslidedown.vi v8, v8, 2
863; RV64-NEXT:    vmv.x.s a0, v8
864; RV64-NEXT:    addiw a0, a0, 13
865; RV64-NEXT:    ret
866  %bo = add <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
867  %ext = extractelement <4 x i32> %bo, i32 2
868  ret i32 %ext
869}
870
871define i32 @extractelt_sub_v4i32(<4 x i32> %x) {
872; RV32-LABEL: extractelt_sub_v4i32:
873; RV32:       # %bb.0:
874; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
875; RV32-NEXT:    vslidedown.vi v8, v8, 2
876; RV32-NEXT:    vmv.x.s a0, v8
877; RV32-NEXT:    li a1, 13
878; RV32-NEXT:    sub a0, a1, a0
879; RV32-NEXT:    ret
880;
881; RV64-LABEL: extractelt_sub_v4i32:
882; RV64:       # %bb.0:
883; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
884; RV64-NEXT:    vslidedown.vi v8, v8, 2
885; RV64-NEXT:    vmv.x.s a0, v8
886; RV64-NEXT:    li a1, 13
887; RV64-NEXT:    subw a0, a1, a0
888; RV64-NEXT:    ret
889  %bo = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %x
890  %ext = extractelement <4 x i32> %bo, i32 2
891  ret i32 %ext
892}
893
894define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
895; RV32NOM-LABEL: extractelt_mul_v4i32:
896; RV32NOM:       # %bb.0:
897; RV32NOM-NEXT:    li a0, 13
898; RV32NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
899; RV32NOM-NEXT:    vmul.vx v8, v8, a0
900; RV32NOM-NEXT:    vslidedown.vi v8, v8, 2
901; RV32NOM-NEXT:    vmv.x.s a0, v8
902; RV32NOM-NEXT:    ret
903;
904; RV32M-LABEL: extractelt_mul_v4i32:
905; RV32M:       # %bb.0:
906; RV32M-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
907; RV32M-NEXT:    vslidedown.vi v8, v8, 2
908; RV32M-NEXT:    vmv.x.s a0, v8
909; RV32M-NEXT:    li a1, 13
910; RV32M-NEXT:    mul a0, a0, a1
911; RV32M-NEXT:    ret
912;
913; RV64NOM-LABEL: extractelt_mul_v4i32:
914; RV64NOM:       # %bb.0:
915; RV64NOM-NEXT:    li a0, 13
916; RV64NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
917; RV64NOM-NEXT:    vmul.vx v8, v8, a0
918; RV64NOM-NEXT:    vslidedown.vi v8, v8, 2
919; RV64NOM-NEXT:    vmv.x.s a0, v8
920; RV64NOM-NEXT:    ret
921;
922; RV64M-LABEL: extractelt_mul_v4i32:
923; RV64M:       # %bb.0:
924; RV64M-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
925; RV64M-NEXT:    vslidedown.vi v8, v8, 2
926; RV64M-NEXT:    vmv.x.s a0, v8
927; RV64M-NEXT:    li a1, 13
928; RV64M-NEXT:    mulw a0, a0, a1
929; RV64M-NEXT:    ret
930  %bo = mul <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
931  %ext = extractelement <4 x i32> %bo, i32 2
932  ret i32 %ext
933}
934
935define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
936; RV32NOM-LABEL: extractelt_sdiv_v4i32:
937; RV32NOM:       # %bb.0:
938; RV32NOM-NEXT:    lui a0, %hi(.LCPI46_0)
939; RV32NOM-NEXT:    addi a0, a0, %lo(.LCPI46_0)
940; RV32NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
941; RV32NOM-NEXT:    vle32.v v9, (a0)
942; RV32NOM-NEXT:    lui a0, 1044480
943; RV32NOM-NEXT:    vmv.s.x v10, a0
944; RV32NOM-NEXT:    lui a0, 12320
945; RV32NOM-NEXT:    addi a0, a0, 257
946; RV32NOM-NEXT:    vsext.vf4 v11, v10
947; RV32NOM-NEXT:    vand.vv v10, v8, v11
948; RV32NOM-NEXT:    vmulh.vv v8, v8, v9
949; RV32NOM-NEXT:    vmv.s.x v9, a0
950; RV32NOM-NEXT:    vsext.vf4 v11, v9
951; RV32NOM-NEXT:    vadd.vv v8, v8, v10
952; RV32NOM-NEXT:    vsra.vv v9, v8, v11
953; RV32NOM-NEXT:    vsrl.vi v8, v8, 31
954; RV32NOM-NEXT:    vadd.vv v8, v9, v8
955; RV32NOM-NEXT:    vslidedown.vi v8, v8, 2
956; RV32NOM-NEXT:    vmv.x.s a0, v8
957; RV32NOM-NEXT:    ret
958;
959; RV32M-LABEL: extractelt_sdiv_v4i32:
960; RV32M:       # %bb.0:
961; RV32M-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
962; RV32M-NEXT:    vslidedown.vi v8, v8, 2
963; RV32M-NEXT:    lui a0, 322639
964; RV32M-NEXT:    vmv.x.s a1, v8
965; RV32M-NEXT:    addi a0, a0, -945
966; RV32M-NEXT:    mulh a0, a1, a0
967; RV32M-NEXT:    srli a1, a0, 31
968; RV32M-NEXT:    srai a0, a0, 2
969; RV32M-NEXT:    add a0, a0, a1
970; RV32M-NEXT:    ret
971;
972; RV64NOM-LABEL: extractelt_sdiv_v4i32:
973; RV64NOM:       # %bb.0:
974; RV64NOM-NEXT:    lui a0, %hi(.LCPI46_0)
975; RV64NOM-NEXT:    addi a0, a0, %lo(.LCPI46_0)
976; RV64NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
977; RV64NOM-NEXT:    vle32.v v9, (a0)
978; RV64NOM-NEXT:    lui a0, 1044480
979; RV64NOM-NEXT:    vmv.s.x v10, a0
980; RV64NOM-NEXT:    lui a0, 12320
981; RV64NOM-NEXT:    addi a0, a0, 257
982; RV64NOM-NEXT:    vsext.vf4 v11, v10
983; RV64NOM-NEXT:    vand.vv v10, v8, v11
984; RV64NOM-NEXT:    vmulh.vv v8, v8, v9
985; RV64NOM-NEXT:    vmv.s.x v9, a0
986; RV64NOM-NEXT:    vadd.vv v8, v8, v10
987; RV64NOM-NEXT:    vsext.vf4 v10, v9
988; RV64NOM-NEXT:    vsra.vv v8, v8, v10
989; RV64NOM-NEXT:    vsrl.vi v9, v8, 31
990; RV64NOM-NEXT:    vadd.vv v8, v8, v9
991; RV64NOM-NEXT:    vslidedown.vi v8, v8, 2
992; RV64NOM-NEXT:    vmv.x.s a0, v8
993; RV64NOM-NEXT:    ret
994;
995; RV64M-LABEL: extractelt_sdiv_v4i32:
996; RV64M:       # %bb.0:
997; RV64M-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
998; RV64M-NEXT:    vslidedown.vi v8, v8, 2
999; RV64M-NEXT:    lui a0, 322639
1000; RV64M-NEXT:    vmv.x.s a1, v8
1001; RV64M-NEXT:    addiw a0, a0, -945
1002; RV64M-NEXT:    mul a0, a1, a0
1003; RV64M-NEXT:    srli a1, a0, 63
1004; RV64M-NEXT:    srai a0, a0, 34
1005; RV64M-NEXT:    add a0, a0, a1
1006; RV64M-NEXT:    ret
1007  %bo = sdiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
1008  %ext = extractelement <4 x i32> %bo, i32 2
1009  ret i32 %ext
1010}
1011
1012define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
1013; RV32NOM-LABEL: extractelt_udiv_v4i32:
1014; RV32NOM:       # %bb.0:
1015; RV32NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1016; RV32NOM-NEXT:    vsrl.vi v8, v8, 0
1017; RV32NOM-NEXT:    lui a0, 322639
1018; RV32NOM-NEXT:    addi a0, a0, -945
1019; RV32NOM-NEXT:    vmulhu.vx v8, v8, a0
1020; RV32NOM-NEXT:    vslidedown.vi v8, v8, 2
1021; RV32NOM-NEXT:    vmv.x.s a0, v8
1022; RV32NOM-NEXT:    srli a0, a0, 2
1023; RV32NOM-NEXT:    ret
1024;
1025; RV32M-LABEL: extractelt_udiv_v4i32:
1026; RV32M:       # %bb.0:
1027; RV32M-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1028; RV32M-NEXT:    vslidedown.vi v8, v8, 2
1029; RV32M-NEXT:    lui a0, 322639
1030; RV32M-NEXT:    vmv.x.s a1, v8
1031; RV32M-NEXT:    addi a0, a0, -945
1032; RV32M-NEXT:    mulhu a0, a1, a0
1033; RV32M-NEXT:    srli a0, a0, 2
1034; RV32M-NEXT:    ret
1035;
1036; RV64NOM-LABEL: extractelt_udiv_v4i32:
1037; RV64NOM:       # %bb.0:
1038; RV64NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1039; RV64NOM-NEXT:    vsrl.vi v8, v8, 0
1040; RV64NOM-NEXT:    lui a0, 322639
1041; RV64NOM-NEXT:    addi a0, a0, -945
1042; RV64NOM-NEXT:    vmulhu.vx v8, v8, a0
1043; RV64NOM-NEXT:    vslidedown.vi v8, v8, 2
1044; RV64NOM-NEXT:    vmv.x.s a0, v8
1045; RV64NOM-NEXT:    slli a0, a0, 33
1046; RV64NOM-NEXT:    srli a0, a0, 35
1047; RV64NOM-NEXT:    ret
1048;
1049; RV64M-LABEL: extractelt_udiv_v4i32:
1050; RV64M:       # %bb.0:
1051; RV64M-NEXT:    lui a0, 322639
1052; RV64M-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1053; RV64M-NEXT:    vslidedown.vi v8, v8, 2
1054; RV64M-NEXT:    addi a0, a0, -945
1055; RV64M-NEXT:    vmv.x.s a1, v8
1056; RV64M-NEXT:    slli a0, a0, 32
1057; RV64M-NEXT:    slli a1, a1, 32
1058; RV64M-NEXT:    mulhu a0, a1, a0
1059; RV64M-NEXT:    srli a0, a0, 34
1060; RV64M-NEXT:    ret
1061  %bo = udiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
1062  %ext = extractelement <4 x i32> %bo, i32 2
1063  ret i32 %ext
1064}
1065
1066define float @extractelt_fadd_v4f32(<4 x float> %x) {
1067; CHECK-LABEL: extractelt_fadd_v4f32:
1068; CHECK:       # %bb.0:
1069; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1070; CHECK-NEXT:    vslidedown.vi v8, v8, 2
1071; CHECK-NEXT:    lui a0, 267520
1072; CHECK-NEXT:    vfmv.f.s fa5, v8
1073; CHECK-NEXT:    fmv.w.x fa4, a0
1074; CHECK-NEXT:    fadd.s fa0, fa5, fa4
1075; CHECK-NEXT:    ret
1076  %bo = fadd <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1077  %ext = extractelement <4 x float> %bo, i32 2
1078  ret float %ext
1079}
1080
1081define float @extractelt_fsub_v4f32(<4 x float> %x) {
1082; CHECK-LABEL: extractelt_fsub_v4f32:
1083; CHECK:       # %bb.0:
1084; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1085; CHECK-NEXT:    vslidedown.vi v8, v8, 2
1086; CHECK-NEXT:    lui a0, 267520
1087; CHECK-NEXT:    vfmv.f.s fa5, v8
1088; CHECK-NEXT:    fmv.w.x fa4, a0
1089; CHECK-NEXT:    fsub.s fa0, fa4, fa5
1090; CHECK-NEXT:    ret
1091  %bo = fsub <4 x float> <float 11.0, float 12.0, float 13.0, float 14.0>, %x
1092  %ext = extractelement <4 x float> %bo, i32 2
1093  ret float %ext
1094}
1095
1096define float @extractelt_fmul_v4f32(<4 x float> %x) {
1097; CHECK-LABEL: extractelt_fmul_v4f32:
1098; CHECK:       # %bb.0:
1099; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1100; CHECK-NEXT:    vslidedown.vi v8, v8, 2
1101; CHECK-NEXT:    lui a0, 267520
1102; CHECK-NEXT:    vfmv.f.s fa5, v8
1103; CHECK-NEXT:    fmv.w.x fa4, a0
1104; CHECK-NEXT:    fmul.s fa0, fa5, fa4
1105; CHECK-NEXT:    ret
1106  %bo = fmul <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1107  %ext = extractelement <4 x float> %bo, i32 2
1108  ret float %ext
1109}
1110
1111define float @extractelt_fdiv_v4f32(<4 x float> %x) {
1112; CHECK-LABEL: extractelt_fdiv_v4f32:
1113; CHECK:       # %bb.0:
1114; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1115; CHECK-NEXT:    vslidedown.vi v8, v8, 2
1116; CHECK-NEXT:    lui a0, 267520
1117; CHECK-NEXT:    vfmv.f.s fa5, v8
1118; CHECK-NEXT:    fmv.w.x fa4, a0
1119; CHECK-NEXT:    fdiv.s fa0, fa5, fa4
1120; CHECK-NEXT:    ret
1121  %bo = fdiv <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1122  %ext = extractelement <4 x float> %bo, i32 2
1123  ret float %ext
1124}
1125
1126define i32 @extractelt_v16i32_idx7_exact_vlen(<16 x i32> %a) nounwind vscale_range(2,2) {
1127; CHECK-LABEL: extractelt_v16i32_idx7_exact_vlen:
1128; CHECK:       # %bb.0:
1129; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1130; CHECK-NEXT:    vslidedown.vi v8, v9, 3
1131; CHECK-NEXT:    vmv.x.s a0, v8
1132; CHECK-NEXT:    ret
1133  %b = extractelement <16 x i32> %a, i32 7
1134  ret i32 %b
1135}
1136
1137define i32 @extractelt_v16i32_idx15_exact_vlen(<16 x i32> %a) nounwind vscale_range(2,2) {
1138; CHECK-LABEL: extractelt_v16i32_idx15_exact_vlen:
1139; CHECK:       # %bb.0:
1140; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1141; CHECK-NEXT:    vslidedown.vi v8, v11, 3
1142; CHECK-NEXT:    vmv.x.s a0, v8
1143; CHECK-NEXT:    ret
1144  %b = extractelement <16 x i32> %a, i32 15
1145  ret i32 %b
1146}
1147