xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define void @splat_v16i8(ptr %x, i8 %y) {
6; CHECK-LABEL: splat_v16i8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
9; CHECK-NEXT:    vmv.v.x v8, a1
10; CHECK-NEXT:    vse8.v v8, (a0)
11; CHECK-NEXT:    ret
12  %a = insertelement <16 x i8> poison, i8 %y, i32 0
13  %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer
14  store <16 x i8> %b, ptr %x
15  ret void
16}
17
18define void @splat_v8i16(ptr %x, i16 %y) {
19; CHECK-LABEL: splat_v8i16:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
22; CHECK-NEXT:    vmv.v.x v8, a1
23; CHECK-NEXT:    vse16.v v8, (a0)
24; CHECK-NEXT:    ret
25  %a = insertelement <8 x i16> poison, i16 %y, i32 0
26  %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer
27  store <8 x i16> %b, ptr %x
28  ret void
29}
30
31define void @splat_v4i32(ptr %x, i32 %y) {
32; CHECK-LABEL: splat_v4i32:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
35; CHECK-NEXT:    vmv.v.x v8, a1
36; CHECK-NEXT:    vse32.v v8, (a0)
37; CHECK-NEXT:    ret
38  %a = insertelement <4 x i32> poison, i32 %y, i32 0
39  %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer
40  store <4 x i32> %b, ptr %x
41  ret void
42}
43
44define void @splat_v2i64(ptr %x, i64 %y) {
45; RV32-LABEL: splat_v2i64:
46; RV32:       # %bb.0:
47; RV32-NEXT:    addi sp, sp, -16
48; RV32-NEXT:    .cfi_def_cfa_offset 16
49; RV32-NEXT:    sw a1, 8(sp)
50; RV32-NEXT:    sw a2, 12(sp)
51; RV32-NEXT:    addi a1, sp, 8
52; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
53; RV32-NEXT:    vlse64.v v8, (a1), zero
54; RV32-NEXT:    vse64.v v8, (a0)
55; RV32-NEXT:    addi sp, sp, 16
56; RV32-NEXT:    .cfi_def_cfa_offset 0
57; RV32-NEXT:    ret
58;
59; RV64-LABEL: splat_v2i64:
60; RV64:       # %bb.0:
61; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
62; RV64-NEXT:    vmv.v.x v8, a1
63; RV64-NEXT:    vse64.v v8, (a0)
64; RV64-NEXT:    ret
65  %a = insertelement <2 x i64> poison, i64 %y, i32 0
66  %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer
67  store <2 x i64> %b, ptr %x
68  ret void
69}
70
71define void @splat_v32i8(ptr %x, i8 %y) {
72; CHECK-LABEL: splat_v32i8:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    li a2, 32
75; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
76; CHECK-NEXT:    vmv.v.x v8, a1
77; CHECK-NEXT:    vse8.v v8, (a0)
78; CHECK-NEXT:    ret
79  %a = insertelement <32 x i8> poison, i8 %y, i32 0
80  %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
81  store <32 x i8> %b, ptr %x
82  ret void
83}
84
85define void @splat_v16i16(ptr %x, i16 %y) {
86; CHECK-LABEL: splat_v16i16:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
89; CHECK-NEXT:    vmv.v.x v8, a1
90; CHECK-NEXT:    vse16.v v8, (a0)
91; CHECK-NEXT:    ret
92  %a = insertelement <16 x i16> poison, i16 %y, i32 0
93  %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
94  store <16 x i16> %b, ptr %x
95  ret void
96}
97
98define void @splat_v8i32(ptr %x, i32 %y) {
99; CHECK-LABEL: splat_v8i32:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
102; CHECK-NEXT:    vmv.v.x v8, a1
103; CHECK-NEXT:    vse32.v v8, (a0)
104; CHECK-NEXT:    ret
105  %a = insertelement <8 x i32> poison, i32 %y, i32 0
106  %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
107  store <8 x i32> %b, ptr %x
108  ret void
109}
110
111define void @splat_v4i64(ptr %x, i64 %y) {
112; RV32-LABEL: splat_v4i64:
113; RV32:       # %bb.0:
114; RV32-NEXT:    addi sp, sp, -16
115; RV32-NEXT:    .cfi_def_cfa_offset 16
116; RV32-NEXT:    sw a1, 8(sp)
117; RV32-NEXT:    sw a2, 12(sp)
118; RV32-NEXT:    addi a1, sp, 8
119; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
120; RV32-NEXT:    vlse64.v v8, (a1), zero
121; RV32-NEXT:    vse64.v v8, (a0)
122; RV32-NEXT:    addi sp, sp, 16
123; RV32-NEXT:    .cfi_def_cfa_offset 0
124; RV32-NEXT:    ret
125;
126; RV64-LABEL: splat_v4i64:
127; RV64:       # %bb.0:
128; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
129; RV64-NEXT:    vmv.v.x v8, a1
130; RV64-NEXT:    vse64.v v8, (a0)
131; RV64-NEXT:    ret
132  %a = insertelement <4 x i64> poison, i64 %y, i32 0
133  %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
134  store <4 x i64> %b, ptr %x
135  ret void
136}
137
138define void @splat_zero_v16i8(ptr %x) {
139; CHECK-LABEL: splat_zero_v16i8:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
142; CHECK-NEXT:    vmv.v.i v8, 0
143; CHECK-NEXT:    vse8.v v8, (a0)
144; CHECK-NEXT:    ret
145  store <16 x i8> splat (i8 0), ptr %x
146  ret void
147}
148
149define void @splat_zero_v8i16(ptr %x) {
150; CHECK-LABEL: splat_zero_v8i16:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
153; CHECK-NEXT:    vmv.v.i v8, 0
154; CHECK-NEXT:    vse16.v v8, (a0)
155; CHECK-NEXT:    ret
156  store <8 x i16> splat (i16 0), ptr %x
157  ret void
158}
159
160define void @splat_zero_v4i32(ptr %x) {
161; CHECK-LABEL: splat_zero_v4i32:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
164; CHECK-NEXT:    vmv.v.i v8, 0
165; CHECK-NEXT:    vse32.v v8, (a0)
166; CHECK-NEXT:    ret
167  store <4 x i32> splat (i32 0), ptr %x
168  ret void
169}
170
171define void @splat_zero_v2i64(ptr %x) {
172; CHECK-LABEL: splat_zero_v2i64:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
175; CHECK-NEXT:    vmv.v.i v8, 0
176; CHECK-NEXT:    vse64.v v8, (a0)
177; CHECK-NEXT:    ret
178  store <2 x i64> splat (i64 0), ptr %x
179  ret void
180}
181
182define void @splat_zero_v32i8(ptr %x) {
183; CHECK-LABEL: splat_zero_v32i8:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    li a1, 32
186; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
187; CHECK-NEXT:    vmv.v.i v8, 0
188; CHECK-NEXT:    vse8.v v8, (a0)
189; CHECK-NEXT:    ret
190  store <32 x i8> splat (i8 0), ptr %x
191  ret void
192}
193
194define void @splat_zero_v16i16(ptr %x) {
195; CHECK-LABEL: splat_zero_v16i16:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
198; CHECK-NEXT:    vmv.v.i v8, 0
199; CHECK-NEXT:    vse16.v v8, (a0)
200; CHECK-NEXT:    ret
201  store <16 x i16> splat (i16 0), ptr %x
202  ret void
203}
204
205define void @splat_zero_v8i32(ptr %x) {
206; CHECK-LABEL: splat_zero_v8i32:
207; CHECK:       # %bb.0:
208; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
209; CHECK-NEXT:    vmv.v.i v8, 0
210; CHECK-NEXT:    vse32.v v8, (a0)
211; CHECK-NEXT:    ret
212  store <8 x i32> splat (i32 0), ptr %x
213  ret void
214}
215
216define void @splat_zero_v4i64(ptr %x) {
217; CHECK-LABEL: splat_zero_v4i64:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
220; CHECK-NEXT:    vmv.v.i v8, 0
221; CHECK-NEXT:    vse64.v v8, (a0)
222; CHECK-NEXT:    ret
223  store <4 x i64> splat (i64 0), ptr %x
224  ret void
225}
226
227define void @splat_zero_v2i16(ptr %p) {
228; CHECK-LABEL: splat_zero_v2i16:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    sw zero, 0(a0)
231; CHECK-NEXT:    ret
232  store <2 x i16> zeroinitializer, ptr %p
233  ret void
234}
235
236define void @splat_zero_v2i16_unaligned(ptr %p) {
237; CHECK-LABEL: splat_zero_v2i16_unaligned:
238; CHECK:       # %bb.0:
239; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
240; CHECK-NEXT:    vmv.v.i v8, 0
241; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
242; CHECK-NEXT:    vse8.v v8, (a0)
243; CHECK-NEXT:    ret
244  store <2 x i16> zeroinitializer, ptr %p, align 1
245  ret void
246}
247
248define void @splat_zero_v4i16(ptr %p) {
249; RV32-LABEL: splat_zero_v4i16:
250; RV32:       # %bb.0:
251; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
252; RV32-NEXT:    vmv.v.i v8, 0
253; RV32-NEXT:    vse16.v v8, (a0)
254; RV32-NEXT:    ret
255;
256; RV64-LABEL: splat_zero_v4i16:
257; RV64:       # %bb.0:
258; RV64-NEXT:    sd zero, 0(a0)
259; RV64-NEXT:    ret
260  store <4 x i16> zeroinitializer, ptr %p
261  ret void
262}
263
264define void @splat_zero_v2i32(ptr %p) {
265; RV32-LABEL: splat_zero_v2i32:
266; RV32:       # %bb.0:
267; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
268; RV32-NEXT:    vmv.v.i v8, 0
269; RV32-NEXT:    vse32.v v8, (a0)
270; RV32-NEXT:    ret
271;
272; RV64-LABEL: splat_zero_v2i32:
273; RV64:       # %bb.0:
274; RV64-NEXT:    sd zero, 0(a0)
275; RV64-NEXT:    ret
276  store <2 x i32> zeroinitializer, ptr %p
277  ret void
278}
279
280; Not a power of two and requires more than two scalar stores.
281define void @splat_zero_v7i16(ptr %p) {
282; CHECK-LABEL: splat_zero_v7i16:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vsetivli zero, 7, e16, m1, ta, ma
285; CHECK-NEXT:    vmv.v.i v8, 0
286; CHECK-NEXT:    vse16.v v8, (a0)
287; CHECK-NEXT:    ret
288  store <7 x i16> zeroinitializer, ptr %p
289  ret void
290}
291
292define void @splat_allones_v16i8(ptr %x) {
293; CHECK-LABEL: splat_allones_v16i8:
294; CHECK:       # %bb.0:
295; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
296; CHECK-NEXT:    vmv.v.i v8, -1
297; CHECK-NEXT:    vse8.v v8, (a0)
298; CHECK-NEXT:    ret
299  store <16 x i8> splat (i8 -1), ptr %x
300  ret void
301}
302
303define void @splat_allones_v8i16(ptr %x) {
304; CHECK-LABEL: splat_allones_v8i16:
305; CHECK:       # %bb.0:
306; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
307; CHECK-NEXT:    vmv.v.i v8, -1
308; CHECK-NEXT:    vse16.v v8, (a0)
309; CHECK-NEXT:    ret
310  store <8 x i16> splat (i16 -1), ptr %x
311  ret void
312}
313
314define void @splat_allones_v4i32(ptr %x) {
315; CHECK-LABEL: splat_allones_v4i32:
316; CHECK:       # %bb.0:
317; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
318; CHECK-NEXT:    vmv.v.i v8, -1
319; CHECK-NEXT:    vse32.v v8, (a0)
320; CHECK-NEXT:    ret
321  store <4 x i32> splat (i32 -1), ptr %x
322  ret void
323}
324
325define void @splat_allones_v2i64(ptr %x) {
326; CHECK-LABEL: splat_allones_v2i64:
327; CHECK:       # %bb.0:
328; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
329; CHECK-NEXT:    vmv.v.i v8, -1
330; CHECK-NEXT:    vse64.v v8, (a0)
331; CHECK-NEXT:    ret
332  store <2 x i64> splat (i64 -1), ptr %x
333  ret void
334}
335
336define void @splat_allones_v32i8(ptr %x) {
337; CHECK-LABEL: splat_allones_v32i8:
338; CHECK:       # %bb.0:
339; CHECK-NEXT:    li a1, 32
340; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
341; CHECK-NEXT:    vmv.v.i v8, -1
342; CHECK-NEXT:    vse8.v v8, (a0)
343; CHECK-NEXT:    ret
344  store <32 x i8> splat (i8 -1), ptr %x
345  ret void
346}
347
348define void @splat_allones_v16i16(ptr %x) {
349; CHECK-LABEL: splat_allones_v16i16:
350; CHECK:       # %bb.0:
351; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
352; CHECK-NEXT:    vmv.v.i v8, -1
353; CHECK-NEXT:    vse16.v v8, (a0)
354; CHECK-NEXT:    ret
355  store <16 x i16> splat (i16 -1), ptr %x
356  ret void
357}
358
359define void @splat_allones_v8i32(ptr %x) {
360; CHECK-LABEL: splat_allones_v8i32:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
363; CHECK-NEXT:    vmv.v.i v8, -1
364; CHECK-NEXT:    vse32.v v8, (a0)
365; CHECK-NEXT:    ret
366  store <8 x i32> splat (i32 -1), ptr %x
367  ret void
368}
369
370define void @splat_allones_v4i64(ptr %x) {
371; CHECK-LABEL: splat_allones_v4i64:
372; CHECK:       # %bb.0:
373; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
374; CHECK-NEXT:    vmv.v.i v8, -1
375; CHECK-NEXT:    vse64.v v8, (a0)
376; CHECK-NEXT:    ret
377  store <4 x i64> splat (i64 -1), ptr %x
378  ret void
379}
380
381; This requires a bitcast on RV32 due to type legalization rewriting the
382; build_vector to v8i32.
383; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x
384; with SEW=64 on RV32.
385define void @splat_allones_with_use_v4i64(ptr %x) {
386; CHECK-LABEL: splat_allones_with_use_v4i64:
387; CHECK:       # %bb.0:
388; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
389; CHECK-NEXT:    vle64.v v8, (a0)
390; CHECK-NEXT:    vadd.vi v8, v8, -1
391; CHECK-NEXT:    vse64.v v8, (a0)
392; CHECK-NEXT:    ret
393  %a = load <4 x i64>, ptr %x
394  %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
395  store <4 x i64> %b, ptr %x
396  ret void
397}
398
399; This test used to crash at LMUL=8 when inserting a v16i64 subvector into
400; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of
401; which exceeded maximum-expected size of 512. The scalable container type of
402; nxv8i64 should have been used instead.
403define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) {
404; RV32-LABEL: vadd_vx_v16i64:
405; RV32:       # %bb.0:
406; RV32-NEXT:    addi sp, sp, -16
407; RV32-NEXT:    .cfi_def_cfa_offset 16
408; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
409; RV32-NEXT:    vle64.v v8, (a0)
410; RV32-NEXT:    sw a1, 8(sp)
411; RV32-NEXT:    sw a2, 12(sp)
412; RV32-NEXT:    addi a0, sp, 8
413; RV32-NEXT:    vlse64.v v16, (a0), zero
414; RV32-NEXT:    vadd.vv v8, v8, v16
415; RV32-NEXT:    vse64.v v8, (a3)
416; RV32-NEXT:    addi sp, sp, 16
417; RV32-NEXT:    .cfi_def_cfa_offset 0
418; RV32-NEXT:    ret
419;
420; RV64-LABEL: vadd_vx_v16i64:
421; RV64:       # %bb.0:
422; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
423; RV64-NEXT:    vle64.v v8, (a0)
424; RV64-NEXT:    vadd.vx v8, v8, a1
425; RV64-NEXT:    vse64.v v8, (a2)
426; RV64-NEXT:    ret
427  %va = load <16 x i64>, ptr %a
428  %head = insertelement <16 x i64> poison, i64 %b, i32 0
429  %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer
430  %vc = add <16 x i64> %va, %splat
431  store <16 x i64> %vc, ptr %c
432  ret void
433}
434