xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
4
5define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
6; RV32-LABEL: vselect_vv_v6i32:
7; RV32:       # %bb.0:
8; RV32-NEXT:    lbu a2, 0(a2)
9; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
10; RV32-NEXT:    vle32.v v8, (a1)
11; RV32-NEXT:    slli a1, a2, 30
12; RV32-NEXT:    andi a4, a2, 1
13; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
14; RV32-NEXT:    vmv.v.x v10, a4
15; RV32-NEXT:    slli a4, a2, 29
16; RV32-NEXT:    srli a1, a1, 31
17; RV32-NEXT:    vslide1down.vx v10, v10, a1
18; RV32-NEXT:    slli a1, a2, 28
19; RV32-NEXT:    srli a4, a4, 31
20; RV32-NEXT:    vslide1down.vx v10, v10, a4
21; RV32-NEXT:    slli a4, a2, 27
22; RV32-NEXT:    srli a2, a2, 5
23; RV32-NEXT:    srli a1, a1, 31
24; RV32-NEXT:    srli a4, a4, 31
25; RV32-NEXT:    vslide1down.vx v10, v10, a1
26; RV32-NEXT:    vslide1down.vx v10, v10, a4
27; RV32-NEXT:    vslide1down.vx v10, v10, a2
28; RV32-NEXT:    vslidedown.vi v10, v10, 2
29; RV32-NEXT:    vand.vi v10, v10, 1
30; RV32-NEXT:    vmsne.vi v0, v10, 0
31; RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
32; RV32-NEXT:    vle32.v v8, (a0), v0.t
33; RV32-NEXT:    vse32.v v8, (a3)
34; RV32-NEXT:    ret
35;
36; RV64-LABEL: vselect_vv_v6i32:
37; RV64:       # %bb.0:
38; RV64-NEXT:    lbu a2, 0(a2)
39; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
40; RV64-NEXT:    vle32.v v8, (a1)
41; RV64-NEXT:    slli a1, a2, 62
42; RV64-NEXT:    andi a4, a2, 1
43; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
44; RV64-NEXT:    vmv.v.x v10, a4
45; RV64-NEXT:    slli a4, a2, 61
46; RV64-NEXT:    srli a1, a1, 63
47; RV64-NEXT:    vslide1down.vx v10, v10, a1
48; RV64-NEXT:    slli a1, a2, 60
49; RV64-NEXT:    srli a4, a4, 63
50; RV64-NEXT:    vslide1down.vx v10, v10, a4
51; RV64-NEXT:    slli a4, a2, 59
52; RV64-NEXT:    srli a2, a2, 5
53; RV64-NEXT:    srli a1, a1, 63
54; RV64-NEXT:    srli a4, a4, 63
55; RV64-NEXT:    vslide1down.vx v10, v10, a1
56; RV64-NEXT:    vslide1down.vx v10, v10, a4
57; RV64-NEXT:    vslide1down.vx v10, v10, a2
58; RV64-NEXT:    vslidedown.vi v10, v10, 2
59; RV64-NEXT:    vand.vi v10, v10, 1
60; RV64-NEXT:    vmsne.vi v0, v10, 0
61; RV64-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
62; RV64-NEXT:    vle32.v v8, (a0), v0.t
63; RV64-NEXT:    vse32.v v8, (a3)
64; RV64-NEXT:    ret
65  %va = load <6 x i32>, ptr %a
66  %vb = load <6 x i32>, ptr %b
67  %vcc = load <6 x i1>, ptr %cc
68  %vsel = select <6 x i1> %vcc, <6 x i32> %va, <6 x i32> %vb
69  store <6 x i32> %vsel, ptr %z
70  ret void
71}
72
73define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
74; RV32-LABEL: vselect_vx_v6i32:
75; RV32:       # %bb.0:
76; RV32-NEXT:    lbu a2, 0(a2)
77; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
78; RV32-NEXT:    vle32.v v8, (a1)
79; RV32-NEXT:    slli a1, a2, 30
80; RV32-NEXT:    andi a4, a2, 1
81; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
82; RV32-NEXT:    vmv.v.x v10, a4
83; RV32-NEXT:    slli a4, a2, 29
84; RV32-NEXT:    srli a1, a1, 31
85; RV32-NEXT:    vslide1down.vx v10, v10, a1
86; RV32-NEXT:    slli a1, a2, 28
87; RV32-NEXT:    srli a4, a4, 31
88; RV32-NEXT:    vslide1down.vx v10, v10, a4
89; RV32-NEXT:    slli a4, a2, 27
90; RV32-NEXT:    srli a2, a2, 5
91; RV32-NEXT:    srli a1, a1, 31
92; RV32-NEXT:    srli a4, a4, 31
93; RV32-NEXT:    vslide1down.vx v10, v10, a1
94; RV32-NEXT:    vslide1down.vx v10, v10, a4
95; RV32-NEXT:    vslide1down.vx v10, v10, a2
96; RV32-NEXT:    vslidedown.vi v10, v10, 2
97; RV32-NEXT:    vand.vi v10, v10, 1
98; RV32-NEXT:    vmsne.vi v0, v10, 0
99; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
100; RV32-NEXT:    vmerge.vxm v8, v8, a0, v0
101; RV32-NEXT:    vse32.v v8, (a3)
102; RV32-NEXT:    ret
103;
104; RV64-LABEL: vselect_vx_v6i32:
105; RV64:       # %bb.0:
106; RV64-NEXT:    lbu a2, 0(a2)
107; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
108; RV64-NEXT:    vle32.v v8, (a1)
109; RV64-NEXT:    slli a1, a2, 62
110; RV64-NEXT:    andi a4, a2, 1
111; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
112; RV64-NEXT:    vmv.v.x v10, a4
113; RV64-NEXT:    slli a4, a2, 61
114; RV64-NEXT:    srli a1, a1, 63
115; RV64-NEXT:    vslide1down.vx v10, v10, a1
116; RV64-NEXT:    slli a1, a2, 60
117; RV64-NEXT:    srli a4, a4, 63
118; RV64-NEXT:    vslide1down.vx v10, v10, a4
119; RV64-NEXT:    slli a4, a2, 59
120; RV64-NEXT:    srli a2, a2, 5
121; RV64-NEXT:    srli a1, a1, 63
122; RV64-NEXT:    srli a4, a4, 63
123; RV64-NEXT:    vslide1down.vx v10, v10, a1
124; RV64-NEXT:    vslide1down.vx v10, v10, a4
125; RV64-NEXT:    vslide1down.vx v10, v10, a2
126; RV64-NEXT:    vslidedown.vi v10, v10, 2
127; RV64-NEXT:    vand.vi v10, v10, 1
128; RV64-NEXT:    vmsne.vi v0, v10, 0
129; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
130; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
131; RV64-NEXT:    vse32.v v8, (a3)
132; RV64-NEXT:    ret
133  %vb = load <6 x i32>, ptr %b
134  %ahead = insertelement <6 x i32> poison, i32 %a, i32 0
135  %va = shufflevector <6 x i32> %ahead, <6 x i32> poison, <6 x i32> zeroinitializer
136  %vcc = load <6 x i1>, ptr %cc
137  %vsel = select <6 x i1> %vcc, <6 x i32> %va, <6 x i32> %vb
138  store <6 x i32> %vsel, ptr %z
139  ret void
140}
141
142define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
143; RV32-LABEL: vselect_vi_v6i32:
144; RV32:       # %bb.0:
145; RV32-NEXT:    lbu a1, 0(a1)
146; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
147; RV32-NEXT:    vle32.v v8, (a0)
148; RV32-NEXT:    slli a0, a1, 30
149; RV32-NEXT:    andi a3, a1, 1
150; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
151; RV32-NEXT:    vmv.v.x v10, a3
152; RV32-NEXT:    slli a3, a1, 29
153; RV32-NEXT:    srli a0, a0, 31
154; RV32-NEXT:    vslide1down.vx v10, v10, a0
155; RV32-NEXT:    slli a0, a1, 28
156; RV32-NEXT:    srli a3, a3, 31
157; RV32-NEXT:    vslide1down.vx v10, v10, a3
158; RV32-NEXT:    slli a3, a1, 27
159; RV32-NEXT:    srli a1, a1, 5
160; RV32-NEXT:    srli a0, a0, 31
161; RV32-NEXT:    srli a3, a3, 31
162; RV32-NEXT:    vslide1down.vx v10, v10, a0
163; RV32-NEXT:    vslide1down.vx v10, v10, a3
164; RV32-NEXT:    vslide1down.vx v10, v10, a1
165; RV32-NEXT:    vslidedown.vi v10, v10, 2
166; RV32-NEXT:    vand.vi v10, v10, 1
167; RV32-NEXT:    vmsne.vi v0, v10, 0
168; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
169; RV32-NEXT:    vmerge.vim v8, v8, -1, v0
170; RV32-NEXT:    vse32.v v8, (a2)
171; RV32-NEXT:    ret
172;
173; RV64-LABEL: vselect_vi_v6i32:
174; RV64:       # %bb.0:
175; RV64-NEXT:    lbu a1, 0(a1)
176; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
177; RV64-NEXT:    vle32.v v8, (a0)
178; RV64-NEXT:    slli a0, a1, 62
179; RV64-NEXT:    andi a3, a1, 1
180; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
181; RV64-NEXT:    vmv.v.x v10, a3
182; RV64-NEXT:    slli a3, a1, 61
183; RV64-NEXT:    srli a0, a0, 63
184; RV64-NEXT:    vslide1down.vx v10, v10, a0
185; RV64-NEXT:    slli a0, a1, 60
186; RV64-NEXT:    srli a3, a3, 63
187; RV64-NEXT:    vslide1down.vx v10, v10, a3
188; RV64-NEXT:    slli a3, a1, 59
189; RV64-NEXT:    srli a1, a1, 5
190; RV64-NEXT:    srli a0, a0, 63
191; RV64-NEXT:    srli a3, a3, 63
192; RV64-NEXT:    vslide1down.vx v10, v10, a0
193; RV64-NEXT:    vslide1down.vx v10, v10, a3
194; RV64-NEXT:    vslide1down.vx v10, v10, a1
195; RV64-NEXT:    vslidedown.vi v10, v10, 2
196; RV64-NEXT:    vand.vi v10, v10, 1
197; RV64-NEXT:    vmsne.vi v0, v10, 0
198; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
199; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
200; RV64-NEXT:    vse32.v v8, (a2)
201; RV64-NEXT:    ret
202  %vb = load <6 x i32>, ptr %b
203  %vcc = load <6 x i1>, ptr %cc
204  %vsel = select <6 x i1> %vcc, <6 x i32> splat (i32 -1), <6 x i32> %vb
205  store <6 x i32> %vsel, ptr %z
206  ret void
207}
208
209
210define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
211; RV32-LABEL: vselect_vv_v6f32:
212; RV32:       # %bb.0:
213; RV32-NEXT:    lbu a2, 0(a2)
214; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
215; RV32-NEXT:    vle32.v v8, (a1)
216; RV32-NEXT:    slli a1, a2, 30
217; RV32-NEXT:    andi a4, a2, 1
218; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
219; RV32-NEXT:    vmv.v.x v10, a4
220; RV32-NEXT:    slli a4, a2, 29
221; RV32-NEXT:    srli a1, a1, 31
222; RV32-NEXT:    vslide1down.vx v10, v10, a1
223; RV32-NEXT:    slli a1, a2, 28
224; RV32-NEXT:    srli a4, a4, 31
225; RV32-NEXT:    vslide1down.vx v10, v10, a4
226; RV32-NEXT:    slli a4, a2, 27
227; RV32-NEXT:    srli a2, a2, 5
228; RV32-NEXT:    srli a1, a1, 31
229; RV32-NEXT:    srli a4, a4, 31
230; RV32-NEXT:    vslide1down.vx v10, v10, a1
231; RV32-NEXT:    vslide1down.vx v10, v10, a4
232; RV32-NEXT:    vslide1down.vx v10, v10, a2
233; RV32-NEXT:    vslidedown.vi v10, v10, 2
234; RV32-NEXT:    vand.vi v10, v10, 1
235; RV32-NEXT:    vmsne.vi v0, v10, 0
236; RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
237; RV32-NEXT:    vle32.v v8, (a0), v0.t
238; RV32-NEXT:    vse32.v v8, (a3)
239; RV32-NEXT:    ret
240;
241; RV64-LABEL: vselect_vv_v6f32:
242; RV64:       # %bb.0:
243; RV64-NEXT:    lbu a2, 0(a2)
244; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
245; RV64-NEXT:    vle32.v v8, (a1)
246; RV64-NEXT:    slli a1, a2, 62
247; RV64-NEXT:    andi a4, a2, 1
248; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
249; RV64-NEXT:    vmv.v.x v10, a4
250; RV64-NEXT:    slli a4, a2, 61
251; RV64-NEXT:    srli a1, a1, 63
252; RV64-NEXT:    vslide1down.vx v10, v10, a1
253; RV64-NEXT:    slli a1, a2, 60
254; RV64-NEXT:    srli a4, a4, 63
255; RV64-NEXT:    vslide1down.vx v10, v10, a4
256; RV64-NEXT:    slli a4, a2, 59
257; RV64-NEXT:    srli a2, a2, 5
258; RV64-NEXT:    srli a1, a1, 63
259; RV64-NEXT:    srli a4, a4, 63
260; RV64-NEXT:    vslide1down.vx v10, v10, a1
261; RV64-NEXT:    vslide1down.vx v10, v10, a4
262; RV64-NEXT:    vslide1down.vx v10, v10, a2
263; RV64-NEXT:    vslidedown.vi v10, v10, 2
264; RV64-NEXT:    vand.vi v10, v10, 1
265; RV64-NEXT:    vmsne.vi v0, v10, 0
266; RV64-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
267; RV64-NEXT:    vle32.v v8, (a0), v0.t
268; RV64-NEXT:    vse32.v v8, (a3)
269; RV64-NEXT:    ret
270  %va = load <6 x float>, ptr %a
271  %vb = load <6 x float>, ptr %b
272  %vcc = load <6 x i1>, ptr %cc
273  %vsel = select <6 x i1> %vcc, <6 x float> %va, <6 x float> %vb
274  store <6 x float> %vsel, ptr %z
275  ret void
276}
277
278define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
279; RV32-LABEL: vselect_vx_v6f32:
280; RV32:       # %bb.0:
281; RV32-NEXT:    lbu a1, 0(a1)
282; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
283; RV32-NEXT:    vle32.v v8, (a0)
284; RV32-NEXT:    slli a0, a1, 30
285; RV32-NEXT:    andi a3, a1, 1
286; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
287; RV32-NEXT:    vmv.v.x v10, a3
288; RV32-NEXT:    slli a3, a1, 29
289; RV32-NEXT:    srli a0, a0, 31
290; RV32-NEXT:    vslide1down.vx v10, v10, a0
291; RV32-NEXT:    slli a0, a1, 28
292; RV32-NEXT:    srli a3, a3, 31
293; RV32-NEXT:    vslide1down.vx v10, v10, a3
294; RV32-NEXT:    slli a3, a1, 27
295; RV32-NEXT:    srli a1, a1, 5
296; RV32-NEXT:    srli a0, a0, 31
297; RV32-NEXT:    srli a3, a3, 31
298; RV32-NEXT:    vslide1down.vx v10, v10, a0
299; RV32-NEXT:    vslide1down.vx v10, v10, a3
300; RV32-NEXT:    vslide1down.vx v10, v10, a1
301; RV32-NEXT:    vslidedown.vi v10, v10, 2
302; RV32-NEXT:    vand.vi v10, v10, 1
303; RV32-NEXT:    vmsne.vi v0, v10, 0
304; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
305; RV32-NEXT:    vfmerge.vfm v8, v8, fa0, v0
306; RV32-NEXT:    vse32.v v8, (a2)
307; RV32-NEXT:    ret
308;
309; RV64-LABEL: vselect_vx_v6f32:
310; RV64:       # %bb.0:
311; RV64-NEXT:    lbu a1, 0(a1)
312; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
313; RV64-NEXT:    vle32.v v8, (a0)
314; RV64-NEXT:    slli a0, a1, 62
315; RV64-NEXT:    andi a3, a1, 1
316; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
317; RV64-NEXT:    vmv.v.x v10, a3
318; RV64-NEXT:    slli a3, a1, 61
319; RV64-NEXT:    srli a0, a0, 63
320; RV64-NEXT:    vslide1down.vx v10, v10, a0
321; RV64-NEXT:    slli a0, a1, 60
322; RV64-NEXT:    srli a3, a3, 63
323; RV64-NEXT:    vslide1down.vx v10, v10, a3
324; RV64-NEXT:    slli a3, a1, 59
325; RV64-NEXT:    srli a1, a1, 5
326; RV64-NEXT:    srli a0, a0, 63
327; RV64-NEXT:    srli a3, a3, 63
328; RV64-NEXT:    vslide1down.vx v10, v10, a0
329; RV64-NEXT:    vslide1down.vx v10, v10, a3
330; RV64-NEXT:    vslide1down.vx v10, v10, a1
331; RV64-NEXT:    vslidedown.vi v10, v10, 2
332; RV64-NEXT:    vand.vi v10, v10, 1
333; RV64-NEXT:    vmsne.vi v0, v10, 0
334; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
335; RV64-NEXT:    vfmerge.vfm v8, v8, fa0, v0
336; RV64-NEXT:    vse32.v v8, (a2)
337; RV64-NEXT:    ret
338  %vb = load <6 x float>, ptr %b
339  %ahead = insertelement <6 x float> poison, float %a, i32 0
340  %va = shufflevector <6 x float> %ahead, <6 x float> poison, <6 x i32> zeroinitializer
341  %vcc = load <6 x i1>, ptr %cc
342  %vsel = select <6 x i1> %vcc, <6 x float> %va, <6 x float> %vb
343  store <6 x float> %vsel, ptr %z
344  ret void
345}
346
347define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
348; RV32-LABEL: vselect_vfpzero_v6f32:
349; RV32:       # %bb.0:
350; RV32-NEXT:    lbu a1, 0(a1)
351; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
352; RV32-NEXT:    vle32.v v8, (a0)
353; RV32-NEXT:    slli a0, a1, 30
354; RV32-NEXT:    andi a3, a1, 1
355; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
356; RV32-NEXT:    vmv.v.x v10, a3
357; RV32-NEXT:    slli a3, a1, 29
358; RV32-NEXT:    srli a0, a0, 31
359; RV32-NEXT:    vslide1down.vx v10, v10, a0
360; RV32-NEXT:    slli a0, a1, 28
361; RV32-NEXT:    srli a3, a3, 31
362; RV32-NEXT:    vslide1down.vx v10, v10, a3
363; RV32-NEXT:    slli a3, a1, 27
364; RV32-NEXT:    srli a1, a1, 5
365; RV32-NEXT:    srli a0, a0, 31
366; RV32-NEXT:    srli a3, a3, 31
367; RV32-NEXT:    vslide1down.vx v10, v10, a0
368; RV32-NEXT:    vslide1down.vx v10, v10, a3
369; RV32-NEXT:    vslide1down.vx v10, v10, a1
370; RV32-NEXT:    vslidedown.vi v10, v10, 2
371; RV32-NEXT:    vand.vi v10, v10, 1
372; RV32-NEXT:    vmsne.vi v0, v10, 0
373; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
374; RV32-NEXT:    vmerge.vim v8, v8, 0, v0
375; RV32-NEXT:    vse32.v v8, (a2)
376; RV32-NEXT:    ret
377;
378; RV64-LABEL: vselect_vfpzero_v6f32:
379; RV64:       # %bb.0:
380; RV64-NEXT:    lbu a1, 0(a1)
381; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
382; RV64-NEXT:    vle32.v v8, (a0)
383; RV64-NEXT:    slli a0, a1, 62
384; RV64-NEXT:    andi a3, a1, 1
385; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
386; RV64-NEXT:    vmv.v.x v10, a3
387; RV64-NEXT:    slli a3, a1, 61
388; RV64-NEXT:    srli a0, a0, 63
389; RV64-NEXT:    vslide1down.vx v10, v10, a0
390; RV64-NEXT:    slli a0, a1, 60
391; RV64-NEXT:    srli a3, a3, 63
392; RV64-NEXT:    vslide1down.vx v10, v10, a3
393; RV64-NEXT:    slli a3, a1, 59
394; RV64-NEXT:    srli a1, a1, 5
395; RV64-NEXT:    srli a0, a0, 63
396; RV64-NEXT:    srli a3, a3, 63
397; RV64-NEXT:    vslide1down.vx v10, v10, a0
398; RV64-NEXT:    vslide1down.vx v10, v10, a3
399; RV64-NEXT:    vslide1down.vx v10, v10, a1
400; RV64-NEXT:    vslidedown.vi v10, v10, 2
401; RV64-NEXT:    vand.vi v10, v10, 1
402; RV64-NEXT:    vmsne.vi v0, v10, 0
403; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
404; RV64-NEXT:    vmerge.vim v8, v8, 0, v0
405; RV64-NEXT:    vse32.v v8, (a2)
406; RV64-NEXT:    ret
407  %vb = load <6 x float>, ptr %b
408  %vcc = load <6 x i1>, ptr %cc
409  %vsel = select <6 x i1> %vcc, <6 x float> splat (float 0.0), <6 x float> %vb
410  store <6 x float> %vsel, ptr %z
411  ret void
412}
413
414define void @vselect_vv_v8i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
415; CHECK-LABEL: vselect_vv_v8i32:
416; CHECK:       # %bb.0:
417; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
418; CHECK-NEXT:    vlm.v v0, (a2)
419; CHECK-NEXT:    vle32.v v8, (a1)
420; CHECK-NEXT:    vle32.v v8, (a0), v0.t
421; CHECK-NEXT:    vse32.v v8, (a3)
422; CHECK-NEXT:    ret
423  %va = load <8 x i32>, ptr %a
424  %vb = load <8 x i32>, ptr %b
425  %vcc = load <8 x i1>, ptr %cc
426  %vsel = select <8 x i1> %vcc, <8 x i32> %va, <8 x i32> %vb
427  store <8 x i32> %vsel, ptr %z
428  ret void
429}
430
431define void @vselect_vx_v8i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
432; CHECK-LABEL: vselect_vx_v8i32:
433; CHECK:       # %bb.0:
434; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
435; CHECK-NEXT:    vlm.v v0, (a2)
436; CHECK-NEXT:    vle32.v v8, (a1)
437; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
438; CHECK-NEXT:    vse32.v v8, (a3)
439; CHECK-NEXT:    ret
440  %vb = load <8 x i32>, ptr %b
441  %ahead = insertelement <8 x i32> poison, i32 %a, i32 0
442  %va = shufflevector <8 x i32> %ahead, <8 x i32> poison, <8 x i32> zeroinitializer
443  %vcc = load <8 x i1>, ptr %cc
444  %vsel = select <8 x i1> %vcc, <8 x i32> %va, <8 x i32> %vb
445  store <8 x i32> %vsel, ptr %z
446  ret void
447}
448
449define void @vselect_vi_v8i32(ptr %b, ptr %cc, ptr %z) {
450; CHECK-LABEL: vselect_vi_v8i32:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
453; CHECK-NEXT:    vlm.v v0, (a1)
454; CHECK-NEXT:    vle32.v v8, (a0)
455; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
456; CHECK-NEXT:    vse32.v v8, (a2)
457; CHECK-NEXT:    ret
458  %vb = load <8 x i32>, ptr %b
459  %vcc = load <8 x i1>, ptr %cc
460  %vsel = select <8 x i1> %vcc, <8 x i32> splat (i32 -1), <8 x i32> %vb
461  store <8 x i32> %vsel, ptr %z
462  ret void
463}
464
465define void @vselect_vv_v8f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
466; CHECK-LABEL: vselect_vv_v8f32:
467; CHECK:       # %bb.0:
468; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
469; CHECK-NEXT:    vlm.v v0, (a2)
470; CHECK-NEXT:    vle32.v v8, (a1)
471; CHECK-NEXT:    vle32.v v8, (a0), v0.t
472; CHECK-NEXT:    vse32.v v8, (a3)
473; CHECK-NEXT:    ret
474  %va = load <8 x float>, ptr %a
475  %vb = load <8 x float>, ptr %b
476  %vcc = load <8 x i1>, ptr %cc
477  %vsel = select <8 x i1> %vcc, <8 x float> %va, <8 x float> %vb
478  store <8 x float> %vsel, ptr %z
479  ret void
480}
481
482define void @vselect_vx_v8f32(float %a, ptr %b, ptr %cc, ptr %z) {
483; CHECK-LABEL: vselect_vx_v8f32:
484; CHECK:       # %bb.0:
485; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
486; CHECK-NEXT:    vlm.v v0, (a1)
487; CHECK-NEXT:    vle32.v v8, (a0)
488; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
489; CHECK-NEXT:    vse32.v v8, (a2)
490; CHECK-NEXT:    ret
491  %vb = load <8 x float>, ptr %b
492  %ahead = insertelement <8 x float> poison, float %a, i32 0
493  %va = shufflevector <8 x float> %ahead, <8 x float> poison, <8 x i32> zeroinitializer
494  %vcc = load <8 x i1>, ptr %cc
495  %vsel = select <8 x i1> %vcc, <8 x float> %va, <8 x float> %vb
496  store <8 x float> %vsel, ptr %z
497  ret void
498}
499
500define void @vselect_vfpzero_v8f32(ptr %b, ptr %cc, ptr %z) {
501; CHECK-LABEL: vselect_vfpzero_v8f32:
502; CHECK:       # %bb.0:
503; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
504; CHECK-NEXT:    vlm.v v0, (a1)
505; CHECK-NEXT:    vle32.v v8, (a0)
506; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
507; CHECK-NEXT:    vse32.v v8, (a2)
508; CHECK-NEXT:    ret
509  %vb = load <8 x float>, ptr %b
510  %vcc = load <8 x i1>, ptr %cc
511  %vsel = select <8 x i1> %vcc, <8 x float> splat (float 0.0), <8 x float> %vb
512  store <8 x float> %vsel, ptr %z
513  ret void
514}
515
516define void @vselect_vv_v16i16(ptr %a, ptr %b, ptr %cc, ptr %z) {
517; CHECK-LABEL: vselect_vv_v16i16:
518; CHECK:       # %bb.0:
519; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
520; CHECK-NEXT:    vlm.v v0, (a2)
521; CHECK-NEXT:    vle16.v v8, (a1)
522; CHECK-NEXT:    vle16.v v8, (a0), v0.t
523; CHECK-NEXT:    vse16.v v8, (a3)
524; CHECK-NEXT:    ret
525  %va = load <16 x i16>, ptr %a
526  %vb = load <16 x i16>, ptr %b
527  %vcc = load <16 x i1>, ptr %cc
528  %vsel = select <16 x i1> %vcc, <16 x i16> %va, <16 x i16> %vb
529  store <16 x i16> %vsel, ptr %z
530  ret void
531}
532
533define void @vselect_vx_v16i16(i16 signext %a, ptr %b, ptr %cc, ptr %z) {
534; CHECK-LABEL: vselect_vx_v16i16:
535; CHECK:       # %bb.0:
536; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
537; CHECK-NEXT:    vlm.v v0, (a2)
538; CHECK-NEXT:    vle16.v v8, (a1)
539; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
540; CHECK-NEXT:    vse16.v v8, (a3)
541; CHECK-NEXT:    ret
542  %vb = load <16 x i16>, ptr %b
543  %ahead = insertelement <16 x i16> poison, i16 %a, i32 0
544  %va = shufflevector <16 x i16> %ahead, <16 x i16> poison, <16 x i32> zeroinitializer
545  %vcc = load <16 x i1>, ptr %cc
546  %vsel = select <16 x i1> %vcc, <16 x i16> %va, <16 x i16> %vb
547  store <16 x i16> %vsel, ptr %z
548  ret void
549}
550
551define void @vselect_vi_v16i16(ptr %b, ptr %cc, ptr %z) {
552; CHECK-LABEL: vselect_vi_v16i16:
553; CHECK:       # %bb.0:
554; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
555; CHECK-NEXT:    vlm.v v0, (a1)
556; CHECK-NEXT:    vle16.v v8, (a0)
557; CHECK-NEXT:    vmerge.vim v8, v8, 4, v0
558; CHECK-NEXT:    vse16.v v8, (a2)
559; CHECK-NEXT:    ret
560  %vb = load <16 x i16>, ptr %b
561  %vcc = load <16 x i1>, ptr %cc
562  %vsel = select <16 x i1> %vcc, <16 x i16> splat (i16 4), <16 x i16> %vb
563  store <16 x i16> %vsel, ptr %z
564  ret void
565}
566
567define void @vselect_vv_v32f16(ptr %a, ptr %b, ptr %cc, ptr %z) {
568; CHECK-LABEL: vselect_vv_v32f16:
569; CHECK:       # %bb.0:
570; CHECK-NEXT:    li a4, 32
571; CHECK-NEXT:    vsetvli zero, a4, e16, m4, ta, mu
572; CHECK-NEXT:    vlm.v v0, (a2)
573; CHECK-NEXT:    vle16.v v8, (a1)
574; CHECK-NEXT:    vle16.v v8, (a0), v0.t
575; CHECK-NEXT:    vse16.v v8, (a3)
576; CHECK-NEXT:    ret
577  %va = load <32 x half>, ptr %a
578  %vb = load <32 x half>, ptr %b
579  %vcc = load <32 x i1>, ptr %cc
580  %vsel = select <32 x i1> %vcc, <32 x half> %va, <32 x half> %vb
581  store <32 x half> %vsel, ptr %z
582  ret void
583}
584
585define void @vselect_vx_v32f16(half %a, ptr %b, ptr %cc, ptr %z) {
586; CHECK-LABEL: vselect_vx_v32f16:
587; CHECK:       # %bb.0:
588; CHECK-NEXT:    li a3, 32
589; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
590; CHECK-NEXT:    vlm.v v0, (a1)
591; CHECK-NEXT:    vle16.v v8, (a0)
592; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
593; CHECK-NEXT:    vse16.v v8, (a2)
594; CHECK-NEXT:    ret
595  %vb = load <32 x half>, ptr %b
596  %ahead = insertelement <32 x half> poison, half %a, i32 0
597  %va = shufflevector <32 x half> %ahead, <32 x half> poison, <32 x i32> zeroinitializer
598  %vcc = load <32 x i1>, ptr %cc
599  %vsel = select <32 x i1> %vcc, <32 x half> %va, <32 x half> %vb
600  store <32 x half> %vsel, ptr %z
601  ret void
602}
603
604define void @vselect_vfpzero_v32f16(ptr %b, ptr %cc, ptr %z) {
605; CHECK-LABEL: vselect_vfpzero_v32f16:
606; CHECK:       # %bb.0:
607; CHECK-NEXT:    li a3, 32
608; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
609; CHECK-NEXT:    vlm.v v0, (a1)
610; CHECK-NEXT:    vle16.v v8, (a0)
611; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
612; CHECK-NEXT:    vse16.v v8, (a2)
613; CHECK-NEXT:    ret
614  %vb = load <32 x half>, ptr %b
615  %vcc = load <32 x i1>, ptr %cc
616  %vsel = select <32 x i1> %vcc, <32 x half> splat (half 0.0), <32 x half> %vb
617  store <32 x half> %vsel, ptr %z
618  ret void
619}
620
621define <2 x i1> @vselect_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %cc) {
622; CHECK-LABEL: vselect_v2i1:
623; CHECK:       # %bb.0:
624; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
625; CHECK-NEXT:    vmandn.mm v8, v8, v9
626; CHECK-NEXT:    vmand.mm v9, v0, v9
627; CHECK-NEXT:    vmor.mm v0, v9, v8
628; CHECK-NEXT:    ret
629  %v = select <2 x i1> %cc, <2 x i1> %a, <2 x i1> %b
630  ret <2 x i1> %v
631}
632
633define <4 x i1> @vselect_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %cc) {
634; CHECK-LABEL: vselect_v4i1:
635; CHECK:       # %bb.0:
636; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
637; CHECK-NEXT:    vmandn.mm v8, v8, v9
638; CHECK-NEXT:    vmand.mm v9, v0, v9
639; CHECK-NEXT:    vmor.mm v0, v9, v8
640; CHECK-NEXT:    ret
641  %v = select <4 x i1> %cc, <4 x i1> %a, <4 x i1> %b
642  ret <4 x i1> %v
643}
644
645define <8 x i1> @vselect_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %cc) {
646; CHECK-LABEL: vselect_v8i1:
647; CHECK:       # %bb.0:
648; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
649; CHECK-NEXT:    vmandn.mm v8, v8, v9
650; CHECK-NEXT:    vmand.mm v9, v0, v9
651; CHECK-NEXT:    vmor.mm v0, v9, v8
652; CHECK-NEXT:    ret
653  %v = select <8 x i1> %cc, <8 x i1> %a, <8 x i1> %b
654  ret <8 x i1> %v
655}
656
657define <16 x i1> @vselect_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %cc) {
658; CHECK-LABEL: vselect_v16i1:
659; CHECK:       # %bb.0:
660; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
661; CHECK-NEXT:    vmandn.mm v8, v8, v9
662; CHECK-NEXT:    vmand.mm v9, v0, v9
663; CHECK-NEXT:    vmor.mm v0, v9, v8
664; CHECK-NEXT:    ret
665  %v = select <16 x i1> %cc, <16 x i1> %a, <16 x i1> %b
666  ret <16 x i1> %v
667}
668
669define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) {
670; CHECK-LABEL: vselect_v32i1:
671; CHECK:       # %bb.0:
672; CHECK-NEXT:    li a0, 32
673; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
674; CHECK-NEXT:    vmandn.mm v8, v8, v9
675; CHECK-NEXT:    vmand.mm v9, v0, v9
676; CHECK-NEXT:    vmor.mm v0, v9, v8
677; CHECK-NEXT:    ret
678  %v = select <32 x i1> %cc, <32 x i1> %a, <32 x i1> %b
679  ret <32 x i1> %v
680}
681
682define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) {
683; CHECK-LABEL: vselect_v64i1:
684; CHECK:       # %bb.0:
685; CHECK-NEXT:    li a0, 64
686; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
687; CHECK-NEXT:    vmandn.mm v8, v8, v9
688; CHECK-NEXT:    vmand.mm v9, v0, v9
689; CHECK-NEXT:    vmor.mm v0, v9, v8
690; CHECK-NEXT:    ret
691  %v = select <64 x i1> %cc, <64 x i1> %a, <64 x i1> %b
692  ret <64 x i1> %v
693}
694