xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll (revision 2967e5f8007d873a3e9d97870d2461d0827a3976)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define void @splat_v16i8(ptr %x, i8 %y) {
6; CHECK-LABEL: splat_v16i8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
9; CHECK-NEXT:    vmv.v.x v8, a1
10; CHECK-NEXT:    vse8.v v8, (a0)
11; CHECK-NEXT:    ret
12  %a = insertelement <16 x i8> poison, i8 %y, i32 0
13  %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer
14  store <16 x i8> %b, ptr %x
15  ret void
16}
17
18define void @splat_v8i16(ptr %x, i16 %y) {
19; CHECK-LABEL: splat_v8i16:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
22; CHECK-NEXT:    vmv.v.x v8, a1
23; CHECK-NEXT:    vse16.v v8, (a0)
24; CHECK-NEXT:    ret
25  %a = insertelement <8 x i16> poison, i16 %y, i32 0
26  %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer
27  store <8 x i16> %b, ptr %x
28  ret void
29}
30
31define void @splat_v4i32(ptr %x, i32 %y) {
32; CHECK-LABEL: splat_v4i32:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
35; CHECK-NEXT:    vmv.v.x v8, a1
36; CHECK-NEXT:    vse32.v v8, (a0)
37; CHECK-NEXT:    ret
38  %a = insertelement <4 x i32> poison, i32 %y, i32 0
39  %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer
40  store <4 x i32> %b, ptr %x
41  ret void
42}
43
44define void @splat_v2i64(ptr %x, i64 %y) {
45; RV32-LABEL: splat_v2i64:
46; RV32:       # %bb.0:
47; RV32-NEXT:    addi sp, sp, -16
48; RV32-NEXT:    .cfi_def_cfa_offset 16
49; RV32-NEXT:    sw a1, 8(sp)
50; RV32-NEXT:    sw a2, 12(sp)
51; RV32-NEXT:    addi a1, sp, 8
52; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
53; RV32-NEXT:    vlse64.v v8, (a1), zero
54; RV32-NEXT:    vse64.v v8, (a0)
55; RV32-NEXT:    addi sp, sp, 16
56; RV32-NEXT:    ret
57;
58; RV64-LABEL: splat_v2i64:
59; RV64:       # %bb.0:
60; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
61; RV64-NEXT:    vmv.v.x v8, a1
62; RV64-NEXT:    vse64.v v8, (a0)
63; RV64-NEXT:    ret
64  %a = insertelement <2 x i64> poison, i64 %y, i32 0
65  %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer
66  store <2 x i64> %b, ptr %x
67  ret void
68}
69
70define void @splat_v32i8(ptr %x, i8 %y) {
71; CHECK-LABEL: splat_v32i8:
72; CHECK:       # %bb.0:
73; CHECK-NEXT:    li a2, 32
74; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
75; CHECK-NEXT:    vmv.v.x v8, a1
76; CHECK-NEXT:    vse8.v v8, (a0)
77; CHECK-NEXT:    ret
78  %a = insertelement <32 x i8> poison, i8 %y, i32 0
79  %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
80  store <32 x i8> %b, ptr %x
81  ret void
82}
83
84define void @splat_v16i16(ptr %x, i16 %y) {
85; CHECK-LABEL: splat_v16i16:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
88; CHECK-NEXT:    vmv.v.x v8, a1
89; CHECK-NEXT:    vse16.v v8, (a0)
90; CHECK-NEXT:    ret
91  %a = insertelement <16 x i16> poison, i16 %y, i32 0
92  %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
93  store <16 x i16> %b, ptr %x
94  ret void
95}
96
97define void @splat_v8i32(ptr %x, i32 %y) {
98; CHECK-LABEL: splat_v8i32:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
101; CHECK-NEXT:    vmv.v.x v8, a1
102; CHECK-NEXT:    vse32.v v8, (a0)
103; CHECK-NEXT:    ret
104  %a = insertelement <8 x i32> poison, i32 %y, i32 0
105  %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
106  store <8 x i32> %b, ptr %x
107  ret void
108}
109
110define void @splat_v4i64(ptr %x, i64 %y) {
111; RV32-LABEL: splat_v4i64:
112; RV32:       # %bb.0:
113; RV32-NEXT:    addi sp, sp, -16
114; RV32-NEXT:    .cfi_def_cfa_offset 16
115; RV32-NEXT:    sw a1, 8(sp)
116; RV32-NEXT:    sw a2, 12(sp)
117; RV32-NEXT:    addi a1, sp, 8
118; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
119; RV32-NEXT:    vlse64.v v8, (a1), zero
120; RV32-NEXT:    vse64.v v8, (a0)
121; RV32-NEXT:    addi sp, sp, 16
122; RV32-NEXT:    ret
123;
124; RV64-LABEL: splat_v4i64:
125; RV64:       # %bb.0:
126; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
127; RV64-NEXT:    vmv.v.x v8, a1
128; RV64-NEXT:    vse64.v v8, (a0)
129; RV64-NEXT:    ret
130  %a = insertelement <4 x i64> poison, i64 %y, i32 0
131  %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
132  store <4 x i64> %b, ptr %x
133  ret void
134}
135
136define void @splat_zero_v16i8(ptr %x) {
137; CHECK-LABEL: splat_zero_v16i8:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
140; CHECK-NEXT:    vmv.v.i v8, 0
141; CHECK-NEXT:    vse8.v v8, (a0)
142; CHECK-NEXT:    ret
143  store <16 x i8> splat (i8 0), ptr %x
144  ret void
145}
146
147define void @splat_zero_v8i16(ptr %x) {
148; CHECK-LABEL: splat_zero_v8i16:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
151; CHECK-NEXT:    vmv.v.i v8, 0
152; CHECK-NEXT:    vse16.v v8, (a0)
153; CHECK-NEXT:    ret
154  store <8 x i16> splat (i16 0), ptr %x
155  ret void
156}
157
158define void @splat_zero_v4i32(ptr %x) {
159; CHECK-LABEL: splat_zero_v4i32:
160; CHECK:       # %bb.0:
161; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
162; CHECK-NEXT:    vmv.v.i v8, 0
163; CHECK-NEXT:    vse32.v v8, (a0)
164; CHECK-NEXT:    ret
165  store <4 x i32> splat (i32 0), ptr %x
166  ret void
167}
168
169define void @splat_zero_v2i64(ptr %x) {
170; CHECK-LABEL: splat_zero_v2i64:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
173; CHECK-NEXT:    vmv.v.i v8, 0
174; CHECK-NEXT:    vse64.v v8, (a0)
175; CHECK-NEXT:    ret
176  store <2 x i64> splat (i64 0), ptr %x
177  ret void
178}
179
180define void @splat_zero_v32i8(ptr %x) {
181; CHECK-LABEL: splat_zero_v32i8:
182; CHECK:       # %bb.0:
183; CHECK-NEXT:    li a1, 32
184; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
185; CHECK-NEXT:    vmv.v.i v8, 0
186; CHECK-NEXT:    vse8.v v8, (a0)
187; CHECK-NEXT:    ret
188  store <32 x i8> splat (i8 0), ptr %x
189  ret void
190}
191
192define void @splat_zero_v16i16(ptr %x) {
193; CHECK-LABEL: splat_zero_v16i16:
194; CHECK:       # %bb.0:
195; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
196; CHECK-NEXT:    vmv.v.i v8, 0
197; CHECK-NEXT:    vse16.v v8, (a0)
198; CHECK-NEXT:    ret
199  store <16 x i16> splat (i16 0), ptr %x
200  ret void
201}
202
203define void @splat_zero_v8i32(ptr %x) {
204; CHECK-LABEL: splat_zero_v8i32:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
207; CHECK-NEXT:    vmv.v.i v8, 0
208; CHECK-NEXT:    vse32.v v8, (a0)
209; CHECK-NEXT:    ret
210  store <8 x i32> splat (i32 0), ptr %x
211  ret void
212}
213
214define void @splat_zero_v4i64(ptr %x) {
215; CHECK-LABEL: splat_zero_v4i64:
216; CHECK:       # %bb.0:
217; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
218; CHECK-NEXT:    vmv.v.i v8, 0
219; CHECK-NEXT:    vse64.v v8, (a0)
220; CHECK-NEXT:    ret
221  store <4 x i64> splat (i64 0), ptr %x
222  ret void
223}
224
225define void @splat_zero_v2i16(ptr %p) {
226; CHECK-LABEL: splat_zero_v2i16:
227; CHECK:       # %bb.0:
228; CHECK-NEXT:    sw zero, 0(a0)
229; CHECK-NEXT:    ret
230  store <2 x i16> zeroinitializer, ptr %p
231  ret void
232}
233
234define void @splat_zero_v2i16_unaligned(ptr %p) {
235; CHECK-LABEL: splat_zero_v2i16_unaligned:
236; CHECK:       # %bb.0:
237; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
238; CHECK-NEXT:    vmv.v.i v8, 0
239; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
240; CHECK-NEXT:    vse8.v v8, (a0)
241; CHECK-NEXT:    ret
242  store <2 x i16> zeroinitializer, ptr %p, align 1
243  ret void
244}
245
246define void @splat_zero_v4i16(ptr %p) {
247; RV32-LABEL: splat_zero_v4i16:
248; RV32:       # %bb.0:
249; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
250; RV32-NEXT:    vmv.v.i v8, 0
251; RV32-NEXT:    vse16.v v8, (a0)
252; RV32-NEXT:    ret
253;
254; RV64-LABEL: splat_zero_v4i16:
255; RV64:       # %bb.0:
256; RV64-NEXT:    sd zero, 0(a0)
257; RV64-NEXT:    ret
258  store <4 x i16> zeroinitializer, ptr %p
259  ret void
260}
261
262define void @splat_zero_v2i32(ptr %p) {
263; RV32-LABEL: splat_zero_v2i32:
264; RV32:       # %bb.0:
265; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
266; RV32-NEXT:    vmv.v.i v8, 0
267; RV32-NEXT:    vse32.v v8, (a0)
268; RV32-NEXT:    ret
269;
270; RV64-LABEL: splat_zero_v2i32:
271; RV64:       # %bb.0:
272; RV64-NEXT:    sd zero, 0(a0)
273; RV64-NEXT:    ret
274  store <2 x i32> zeroinitializer, ptr %p
275  ret void
276}
277
278; Not a power of two and requires more than two scalar stores.
279define void @splat_zero_v7i16(ptr %p) {
280; CHECK-LABEL: splat_zero_v7i16:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vsetivli zero, 7, e16, m1, ta, ma
283; CHECK-NEXT:    vmv.v.i v8, 0
284; CHECK-NEXT:    vse16.v v8, (a0)
285; CHECK-NEXT:    ret
286  store <7 x i16> zeroinitializer, ptr %p
287  ret void
288}
289
290define void @splat_allones_v16i8(ptr %x) {
291; CHECK-LABEL: splat_allones_v16i8:
292; CHECK:       # %bb.0:
293; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
294; CHECK-NEXT:    vmv.v.i v8, -1
295; CHECK-NEXT:    vse8.v v8, (a0)
296; CHECK-NEXT:    ret
297  store <16 x i8> splat (i8 -1), ptr %x
298  ret void
299}
300
301define void @splat_allones_v8i16(ptr %x) {
302; CHECK-LABEL: splat_allones_v8i16:
303; CHECK:       # %bb.0:
304; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
305; CHECK-NEXT:    vmv.v.i v8, -1
306; CHECK-NEXT:    vse16.v v8, (a0)
307; CHECK-NEXT:    ret
308  store <8 x i16> splat (i16 -1), ptr %x
309  ret void
310}
311
312define void @splat_allones_v4i32(ptr %x) {
313; CHECK-LABEL: splat_allones_v4i32:
314; CHECK:       # %bb.0:
315; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
316; CHECK-NEXT:    vmv.v.i v8, -1
317; CHECK-NEXT:    vse32.v v8, (a0)
318; CHECK-NEXT:    ret
319  store <4 x i32> splat (i32 -1), ptr %x
320  ret void
321}
322
323define void @splat_allones_v2i64(ptr %x) {
324; CHECK-LABEL: splat_allones_v2i64:
325; CHECK:       # %bb.0:
326; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
327; CHECK-NEXT:    vmv.v.i v8, -1
328; CHECK-NEXT:    vse64.v v8, (a0)
329; CHECK-NEXT:    ret
330  store <2 x i64> splat (i64 -1), ptr %x
331  ret void
332}
333
334define void @splat_allones_v32i8(ptr %x) {
335; CHECK-LABEL: splat_allones_v32i8:
336; CHECK:       # %bb.0:
337; CHECK-NEXT:    li a1, 32
338; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
339; CHECK-NEXT:    vmv.v.i v8, -1
340; CHECK-NEXT:    vse8.v v8, (a0)
341; CHECK-NEXT:    ret
342  store <32 x i8> splat (i8 -1), ptr %x
343  ret void
344}
345
346define void @splat_allones_v16i16(ptr %x) {
347; CHECK-LABEL: splat_allones_v16i16:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
350; CHECK-NEXT:    vmv.v.i v8, -1
351; CHECK-NEXT:    vse16.v v8, (a0)
352; CHECK-NEXT:    ret
353  store <16 x i16> splat (i16 -1), ptr %x
354  ret void
355}
356
357define void @splat_allones_v8i32(ptr %x) {
358; CHECK-LABEL: splat_allones_v8i32:
359; CHECK:       # %bb.0:
360; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
361; CHECK-NEXT:    vmv.v.i v8, -1
362; CHECK-NEXT:    vse32.v v8, (a0)
363; CHECK-NEXT:    ret
364  store <8 x i32> splat (i32 -1), ptr %x
365  ret void
366}
367
368define void @splat_allones_v4i64(ptr %x) {
369; CHECK-LABEL: splat_allones_v4i64:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
372; CHECK-NEXT:    vmv.v.i v8, -1
373; CHECK-NEXT:    vse64.v v8, (a0)
374; CHECK-NEXT:    ret
375  store <4 x i64> splat (i64 -1), ptr %x
376  ret void
377}
378
379; This requires a bitcast on RV32 due to type legalization rewriting the
380; build_vector to v8i32.
381; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x
382; with SEW=64 on RV32.
383define void @splat_allones_with_use_v4i64(ptr %x) {
384; CHECK-LABEL: splat_allones_with_use_v4i64:
385; CHECK:       # %bb.0:
386; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
387; CHECK-NEXT:    vle64.v v8, (a0)
388; CHECK-NEXT:    vadd.vi v8, v8, -1
389; CHECK-NEXT:    vse64.v v8, (a0)
390; CHECK-NEXT:    ret
391  %a = load <4 x i64>, ptr %x
392  %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
393  store <4 x i64> %b, ptr %x
394  ret void
395}
396
397; This test used to crash at LMUL=8 when inserting a v16i64 subvector into
398; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of
399; which exceeded maximum-expected size of 512. The scalable container type of
400; nxv8i64 should have been used instead.
401define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) {
402; RV32-LABEL: vadd_vx_v16i64:
403; RV32:       # %bb.0:
404; RV32-NEXT:    addi sp, sp, -16
405; RV32-NEXT:    .cfi_def_cfa_offset 16
406; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
407; RV32-NEXT:    vle64.v v8, (a0)
408; RV32-NEXT:    sw a1, 8(sp)
409; RV32-NEXT:    sw a2, 12(sp)
410; RV32-NEXT:    addi a0, sp, 8
411; RV32-NEXT:    vlse64.v v16, (a0), zero
412; RV32-NEXT:    vadd.vv v8, v8, v16
413; RV32-NEXT:    vse64.v v8, (a3)
414; RV32-NEXT:    addi sp, sp, 16
415; RV32-NEXT:    ret
416;
417; RV64-LABEL: vadd_vx_v16i64:
418; RV64:       # %bb.0:
419; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
420; RV64-NEXT:    vle64.v v8, (a0)
421; RV64-NEXT:    vadd.vx v8, v8, a1
422; RV64-NEXT:    vse64.v v8, (a2)
423; RV64-NEXT:    ret
424  %va = load <16 x i64>, ptr %a
425  %head = insertelement <16 x i64> poison, i64 %b, i32 0
426  %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer
427  %vc = add <16 x i64> %va, %splat
428  store <16 x i64> %vc, ptr %c
429  ret void
430}
431