xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll (revision 088db868f3370ffe01c9750f75732679efecd1fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
3; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
6
7define <4 x bfloat> @shuffle_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
8; CHECK-LABEL: shuffle_v4bf16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
11; CHECK-NEXT:    vmv.v.i v0, 11
12; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
13; CHECK-NEXT:    ret
14  %s = shufflevector <4 x bfloat> %x, <4 x bfloat> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
15  ret <4 x bfloat> %s
16}
17
18define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
19; CHECK-LABEL: shuffle_v4f16:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
22; CHECK-NEXT:    vmv.v.i v0, 11
23; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
24; CHECK-NEXT:    ret
25  %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
26  ret <4 x half> %s
27}
28
29define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) {
30; CHECK-LABEL: shuffle_v8f32:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    li a0, -20
33; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
34; CHECK-NEXT:    vmv.s.x v0, a0
35; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
36; CHECK-NEXT:    ret
37  %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
38  ret <8 x float> %s
39}
40
41define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
42; CHECK-LABEL: shuffle_fv_v4f64:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
45; CHECK-NEXT:    fld fa5, %lo(.LCPI3_0)(a0)
46; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
47; CHECK-NEXT:    vmv.v.i v0, 9
48; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
49; CHECK-NEXT:    vfmerge.vfm v8, v8, fa5, v0
50; CHECK-NEXT:    ret
51  %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
52  ret <4 x double> %s
53}
54
55define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
56; CHECK-LABEL: shuffle_vf_v4f64:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
59; CHECK-NEXT:    fld fa5, %lo(.LCPI4_0)(a0)
60; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
61; CHECK-NEXT:    vmv.v.i v0, 6
62; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
63; CHECK-NEXT:    vfmerge.vfm v8, v8, fa5, v0
64; CHECK-NEXT:    ret
65  %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
66  ret <4 x double> %s
67}
68
69define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
70; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    lui a0, 4096
73; CHECK-NEXT:    addi a0, a0, 513
74; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
75; CHECK-NEXT:    vmv.s.x v10, a0
76; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
77; CHECK-NEXT:    vsext.vf2 v12, v10
78; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
79; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
80; CHECK-NEXT:    vmv.v.v v8, v10
81; CHECK-NEXT:    ret
82  %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
83  ret <4 x double> %s
84}
85
86define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
87; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    lui a0, 4096
90; CHECK-NEXT:    addi a0, a0, 513
91; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
92; CHECK-NEXT:    vmv.s.x v10, a0
93; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
94; CHECK-NEXT:    vsext.vf2 v12, v10
95; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
96; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
97; CHECK-NEXT:    vmv.v.v v8, v10
98; CHECK-NEXT:    ret
99  %s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
100  ret <4 x double> %s
101}
102
103define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
104; CHECK-LABEL: vrgather_shuffle_vv_v4f64:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    lui a0, %hi(.LCPI7_0)
107; CHECK-NEXT:    addi a0, a0, %lo(.LCPI7_0)
108; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
109; CHECK-NEXT:    vle16.v v14, (a0)
110; CHECK-NEXT:    vmv.v.i v0, 8
111; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
112; CHECK-NEXT:    vrgatherei16.vv v12, v8, v14
113; CHECK-NEXT:    vrgather.vi v12, v10, 1, v0.t
114; CHECK-NEXT:    vmv.v.v v8, v12
115; CHECK-NEXT:    ret
116  %s = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
117  ret <4 x double> %s
118}
119
120define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
121; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
122; CHECK:       # %bb.0:
123; CHECK-NEXT:    lui a0, %hi(.LCPI8_0)
124; CHECK-NEXT:    fld fa5, %lo(.LCPI8_0)(a0)
125; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
126; CHECK-NEXT:    vid.v v10
127; CHECK-NEXT:    vrsub.vi v12, v10, 4
128; CHECK-NEXT:    vmv.v.i v0, 12
129; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
130; CHECK-NEXT:    vfmv.v.f v10, fa5
131; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12, v0.t
132; CHECK-NEXT:    vmv.v.v v8, v10
133; CHECK-NEXT:    ret
134  %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
135  ret <4 x double> %s
136}
137
138define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
139; CHECK-LABEL: vrgather_shuffle_vx_v4f64:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
142; CHECK-NEXT:    fld fa5, %lo(.LCPI9_0)(a0)
143; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
144; CHECK-NEXT:    vmv.v.i v10, 9
145; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
146; CHECK-NEXT:    vcompress.vm v12, v8, v10
147; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
148; CHECK-NEXT:    vmv.v.i v0, 3
149; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
150; CHECK-NEXT:    vfmv.v.f v8, fa5
151; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
152; CHECK-NEXT:    ret
153  %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
154  ret <4 x double> %s
155}
156
157define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_1(<8 x bfloat> %x) {
158; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_1:
159; CHECK:       # %bb.0: # %entry
160; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
161; CHECK-NEXT:    vslidedown.vi v8, v8, 1
162; CHECK-NEXT:    ret
163entry:
164  %s = shufflevector <8 x bfloat> %x, <8 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
165  ret <4 x bfloat> %s
166}
167
168define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_3(<8 x bfloat> %x) {
169; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_3:
170; CHECK:       # %bb.0: # %entry
171; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
172; CHECK-NEXT:    vslidedown.vi v8, v8, 3
173; CHECK-NEXT:    ret
174entry:
175  %s = shufflevector <8 x bfloat> %x, <8 x bfloat> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
176  ret <4 x bfloat> %s
177}
178
179define <4 x half> @shuffle_v8f16_to_vslidedown_1(<8 x half> %x) {
180; CHECK-LABEL: shuffle_v8f16_to_vslidedown_1:
181; CHECK:       # %bb.0: # %entry
182; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
183; CHECK-NEXT:    vslidedown.vi v8, v8, 1
184; CHECK-NEXT:    ret
185entry:
186  %s = shufflevector <8 x half> %x, <8 x half> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
187  ret <4 x half> %s
188}
189
190define <4 x half> @shuffle_v8f16_to_vslidedown_3(<8 x half> %x) {
191; CHECK-LABEL: shuffle_v8f16_to_vslidedown_3:
192; CHECK:       # %bb.0: # %entry
193; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
194; CHECK-NEXT:    vslidedown.vi v8, v8, 3
195; CHECK-NEXT:    ret
196entry:
197  %s = shufflevector <8 x half> %x, <8 x half> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
198  ret <4 x half> %s
199}
200
201define <2 x float> @shuffle_v4f32_to_vslidedown(<4 x float> %x) {
202; CHECK-LABEL: shuffle_v4f32_to_vslidedown:
203; CHECK:       # %bb.0: # %entry
204; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
205; CHECK-NEXT:    vslidedown.vi v8, v8, 1
206; CHECK-NEXT:    ret
207entry:
208  %s = shufflevector <4 x float> %x, <4 x float> poison, <2 x i32> <i32 1, i32 2>
209  ret <2 x float> %s
210}
211
212define <4 x bfloat> @slidedown_v4bf16(<4 x bfloat> %x) {
213; CHECK-LABEL: slidedown_v4bf16:
214; CHECK:       # %bb.0:
215; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
216; CHECK-NEXT:    vslidedown.vi v8, v8, 1
217; CHECK-NEXT:    ret
218  %s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
219  ret <4 x bfloat> %s
220}
221
222define <4 x half> @slidedown_v4f16(<4 x half> %x) {
223; CHECK-LABEL: slidedown_v4f16:
224; CHECK:       # %bb.0:
225; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
226; CHECK-NEXT:    vslidedown.vi v8, v8, 1
227; CHECK-NEXT:    ret
228  %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
229  ret <4 x half> %s
230}
231
232define <8 x float> @slidedown_v8f32(<8 x float> %x) {
233; CHECK-LABEL: slidedown_v8f32:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
236; CHECK-NEXT:    vslidedown.vi v8, v8, 3
237; CHECK-NEXT:    ret
238  %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
239  ret <8 x float> %s
240}
241
242define <4 x half> @slideup_v4f16(<4 x half> %x) {
243; CHECK-LABEL: slideup_v4f16:
244; CHECK:       # %bb.0:
245; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
246; CHECK-NEXT:    vslideup.vi v9, v8, 1
247; CHECK-NEXT:    vmv1r.v v8, v9
248; CHECK-NEXT:    ret
249  %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
250  ret <4 x half> %s
251}
252
253define <8 x float> @slideup_v8f32(<8 x float> %x) {
254; CHECK-LABEL: slideup_v8f32:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
257; CHECK-NEXT:    vslideup.vi v10, v8, 3
258; CHECK-NEXT:    vmv.v.v v8, v10
259; CHECK-NEXT:    ret
260  %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4>
261  ret <8 x float> %s
262}
263
264define <8 x float> @splice_unary(<8 x float> %x) {
265; CHECK-LABEL: splice_unary:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
268; CHECK-NEXT:    vslidedown.vi v10, v8, 1
269; CHECK-NEXT:    vslideup.vi v10, v8, 7
270; CHECK-NEXT:    vmv.v.v v8, v10
271; CHECK-NEXT:    ret
272  %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
273  ret <8 x float> %s
274}
275
276define <8 x double> @splice_unary2(<8 x double> %x) {
277; CHECK-LABEL: splice_unary2:
278; CHECK:       # %bb.0:
279; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
280; CHECK-NEXT:    vslidedown.vi v12, v8, 6
281; CHECK-NEXT:    vslideup.vi v12, v8, 2
282; CHECK-NEXT:    vmv.v.v v8, v12
283; CHECK-NEXT:    ret
284  %s = shufflevector <8 x double> %x, <8 x double> poison, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
285  ret <8 x double> %s
286}
287
288define <8 x float> @splice_binary(<8 x float> %x, <8 x float> %y) {
289; CHECK-LABEL: splice_binary:
290; CHECK:       # %bb.0:
291; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
292; CHECK-NEXT:    vslidedown.vi v8, v8, 2
293; CHECK-NEXT:    vslideup.vi v8, v10, 6
294; CHECK-NEXT:    ret
295  %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9>
296  ret <8 x float> %s
297}
298
299define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
300; CHECK-LABEL: splice_binary2:
301; CHECK:       # %bb.0:
302; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
303; CHECK-NEXT:    vslidedown.vi v12, v12, 5
304; CHECK-NEXT:    vslideup.vi v12, v8, 3
305; CHECK-NEXT:    vmv.v.v v8, v12
306; CHECK-NEXT:    ret
307  %s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
308  ret <8 x double> %s
309}
310
311define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) {
312; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16:
313; CHECK:       # %bb.0:
314; CHECK-NEXT:    lui a0, 4096
315; CHECK-NEXT:    addi a0, a0, 513
316; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
317; CHECK-NEXT:    vmv.s.x v9, a0
318; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
319; CHECK-NEXT:    vsext.vf2 v10, v9
320; CHECK-NEXT:    vrgather.vv v9, v8, v10
321; CHECK-NEXT:    vmv1r.v v8, v9
322; CHECK-NEXT:    ret
323  %s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
324  ret <4 x bfloat> %s
325}
326
327define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
328; CHECK-LABEL: vrgather_shuffle_vv_v4bf16:
329; CHECK:       # %bb.0:
330; CHECK-NEXT:    lui a0, %hi(.LCPI25_0)
331; CHECK-NEXT:    addi a0, a0, %lo(.LCPI25_0)
332; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
333; CHECK-NEXT:    vle16.v v11, (a0)
334; CHECK-NEXT:    vmv.v.i v0, 8
335; CHECK-NEXT:    vrgather.vv v10, v8, v11
336; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
337; CHECK-NEXT:    vmv1r.v v8, v10
338; CHECK-NEXT:    ret
339  %s = shufflevector <4 x bfloat> %x, <4 x bfloat> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
340  ret <4 x bfloat> %s
341}
342
343define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load(ptr %p) {
344; CHECK-LABEL: vrgather_shuffle_vx_v4bf16_load:
345; CHECK:       # %bb.0:
346; CHECK-NEXT:    lh a0, 2(a0)
347; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
348; CHECK-NEXT:    vmv.v.x v8, a0
349; CHECK-NEXT:    ret
350  %v = load <4 x bfloat>, ptr %p
351  %s = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
352  ret <4 x bfloat> %s
353}
354
355define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
356; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
357; CHECK:       # %bb.0:
358; CHECK-NEXT:    lui a0, 4096
359; CHECK-NEXT:    addi a0, a0, 513
360; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
361; CHECK-NEXT:    vmv.s.x v9, a0
362; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
363; CHECK-NEXT:    vsext.vf2 v10, v9
364; CHECK-NEXT:    vrgather.vv v9, v8, v10
365; CHECK-NEXT:    vmv1r.v v8, v9
366; CHECK-NEXT:    ret
367  %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
368  ret <4 x half> %s
369}
370
371define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) {
372; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
373; CHECK:       # %bb.0:
374; CHECK-NEXT:    lui a0, %hi(.LCPI28_0)
375; CHECK-NEXT:    addi a0, a0, %lo(.LCPI28_0)
376; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
377; CHECK-NEXT:    vle16.v v11, (a0)
378; CHECK-NEXT:    vmv.v.i v0, 8
379; CHECK-NEXT:    vrgather.vv v10, v8, v11
380; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
381; CHECK-NEXT:    vmv1r.v v8, v10
382; CHECK-NEXT:    ret
383  %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
384  ret <4 x half> %s
385}
386
387define <4 x half> @vrgather_shuffle_vx_v4f16_load(ptr %p) {
388; CHECK-LABEL: vrgather_shuffle_vx_v4f16_load:
389; CHECK:       # %bb.0:
390; CHECK-NEXT:    lh a0, 2(a0)
391; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
392; CHECK-NEXT:    vmv.v.x v8, a0
393; CHECK-NEXT:    ret
394  %v = load <4 x half>, ptr %p
395  %s = shufflevector <4 x half> %v, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
396  ret <4 x half> %s
397}
398
399define <16 x float> @shuffle_disjoint_lanes(<16 x float> %v, <16 x float> %w) {
400; CHECK-LABEL: shuffle_disjoint_lanes:
401; CHECK:       # %bb.0:
402; CHECK-NEXT:    lui a0, %hi(.LCPI30_0)
403; CHECK-NEXT:    addi a0, a0, %lo(.LCPI30_0)
404; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
405; CHECK-NEXT:    vle8.v v16, (a0)
406; CHECK-NEXT:    lui a0, 11
407; CHECK-NEXT:    addi a0, a0, -1366
408; CHECK-NEXT:    vmv.s.x v0, a0
409; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
410; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
411; CHECK-NEXT:    vsext.vf2 v18, v16
412; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
413; CHECK-NEXT:    vrgatherei16.vv v8, v12, v18
414; CHECK-NEXT:    ret
415  %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
416  ret <16 x float> %out
417}
418
419define <16 x float> @shuffle_disjoint_lanes_one_identity(<16 x float> %v, <16 x float> %w) {
420; CHECK-LABEL: shuffle_disjoint_lanes_one_identity:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    lui a0, %hi(.LCPI31_0)
423; CHECK-NEXT:    addi a0, a0, %lo(.LCPI31_0)
424; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
425; CHECK-NEXT:    vle16.v v16, (a0)
426; CHECK-NEXT:    li a0, -272
427; CHECK-NEXT:    vmv.s.x v0, a0
428; CHECK-NEXT:    vrgatherei16.vv v8, v12, v16, v0.t
429; CHECK-NEXT:    ret
430  %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 26, i32 30, i32 22, i32 20, i32 8, i32 31, i32 29, i32 28, i32 27, i32 23, i32 25, i32 22>
431  ret <16 x float> %out
432}
433
434define <16 x float> @shuffle_disjoint_lanes_one_broadcast(<16 x float> %v, <16 x float> %w) {
435; CHECK-LABEL: shuffle_disjoint_lanes_one_broadcast:
436; CHECK:       # %bb.0:
437; CHECK-NEXT:    lui a0, %hi(.LCPI32_0)
438; CHECK-NEXT:    addi a0, a0, %lo(.LCPI32_0)
439; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
440; CHECK-NEXT:    vle16.v v20, (a0)
441; CHECK-NEXT:    lui a0, 15
442; CHECK-NEXT:    addi a0, a0, 240
443; CHECK-NEXT:    vmv.s.x v0, a0
444; CHECK-NEXT:    vrgather.vi v16, v8, 7
445; CHECK-NEXT:    vrgatherei16.vv v16, v12, v20, v0.t
446; CHECK-NEXT:    vmv.v.v v8, v16
447; CHECK-NEXT:    ret
448  %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 26, i32 30, i32 22, i32 18, i32 7, i32 7, i32 7, i32 7, i32 24, i32 28, i32 20, i32 16>
449  ret <16 x float> %out
450}
451
452define <16 x float> @shuffle_disjoint_lanes_one_splat(float %v, <16 x float> %w) {
453; CHECK-LABEL: shuffle_disjoint_lanes_one_splat:
454; CHECK:       # %bb.0:
455; CHECK-NEXT:    lui a0, %hi(.LCPI33_0)
456; CHECK-NEXT:    addi a0, a0, %lo(.LCPI33_0)
457; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
458; CHECK-NEXT:    vle16.v v16, (a0)
459; CHECK-NEXT:    lui a0, 15
460; CHECK-NEXT:    addi a0, a0, 240
461; CHECK-NEXT:    vmv.s.x v0, a0
462; CHECK-NEXT:    vfmv.v.f v12, fa0
463; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16, v0.t
464; CHECK-NEXT:    vmv.v.v v8, v12
465; CHECK-NEXT:    ret
466  %head = insertelement <16 x float> poison, float %v, i32 0
467  %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
468  %out = shufflevector <16 x float> %splat, <16 x float> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
469  ret <16 x float> %out
470}
471