xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll (revision 3cf15af2daa9177a5604d122a9c5cbcf86f7fe33)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32
3; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
4; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
5; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64
6; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
7; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
8
9define void @splat_v16i8(<16 x i8>* %x, i8 %y) {
10; CHECK-LABEL: splat_v16i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
13; CHECK-NEXT:    vmv.v.x v8, a1
14; CHECK-NEXT:    vse8.v v8, (a0)
15; CHECK-NEXT:    ret
16  %a = insertelement <16 x i8> undef, i8 %y, i32 0
17  %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
18  store <16 x i8> %b, <16 x i8>* %x
19  ret void
20}
21
22define void @splat_v8i16(<8 x i16>* %x, i16 %y) {
23; CHECK-LABEL: splat_v8i16:
24; CHECK:       # %bb.0:
25; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
26; CHECK-NEXT:    vmv.v.x v8, a1
27; CHECK-NEXT:    vse16.v v8, (a0)
28; CHECK-NEXT:    ret
29  %a = insertelement <8 x i16> undef, i16 %y, i32 0
30  %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
31  store <8 x i16> %b, <8 x i16>* %x
32  ret void
33}
34
35define void @splat_v4i32(<4 x i32>* %x, i32 %y) {
36; CHECK-LABEL: splat_v4i32:
37; CHECK:       # %bb.0:
38; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
39; CHECK-NEXT:    vmv.v.x v8, a1
40; CHECK-NEXT:    vse32.v v8, (a0)
41; CHECK-NEXT:    ret
42  %a = insertelement <4 x i32> undef, i32 %y, i32 0
43  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
44  store <4 x i32> %b, <4 x i32>* %x
45  ret void
46}
47
48define void @splat_v2i64(<2 x i64>* %x, i64 %y) {
49; LMULMAX8-RV32-LABEL: splat_v2i64:
50; LMULMAX8-RV32:       # %bb.0:
51; LMULMAX8-RV32-NEXT:    addi sp, sp, -16
52; LMULMAX8-RV32-NEXT:    .cfi_def_cfa_offset 16
53; LMULMAX8-RV32-NEXT:    sw a2, 12(sp)
54; LMULMAX8-RV32-NEXT:    sw a1, 8(sp)
55; LMULMAX8-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
56; LMULMAX8-RV32-NEXT:    addi a1, sp, 8
57; LMULMAX8-RV32-NEXT:    vlse64.v v8, (a1), zero
58; LMULMAX8-RV32-NEXT:    vse64.v v8, (a0)
59; LMULMAX8-RV32-NEXT:    addi sp, sp, 16
60; LMULMAX8-RV32-NEXT:    ret
61;
62; LMULMAX2-RV32-LABEL: splat_v2i64:
63; LMULMAX2-RV32:       # %bb.0:
64; LMULMAX2-RV32-NEXT:    addi sp, sp, -16
65; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
66; LMULMAX2-RV32-NEXT:    sw a2, 12(sp)
67; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
68; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
69; LMULMAX2-RV32-NEXT:    addi a1, sp, 8
70; LMULMAX2-RV32-NEXT:    vlse64.v v8, (a1), zero
71; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
72; LMULMAX2-RV32-NEXT:    addi sp, sp, 16
73; LMULMAX2-RV32-NEXT:    ret
74;
75; LMULMAX1-RV32-LABEL: splat_v2i64:
76; LMULMAX1-RV32:       # %bb.0:
77; LMULMAX1-RV32-NEXT:    addi sp, sp, -16
78; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 16
79; LMULMAX1-RV32-NEXT:    sw a2, 12(sp)
80; LMULMAX1-RV32-NEXT:    sw a1, 8(sp)
81; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
82; LMULMAX1-RV32-NEXT:    addi a1, sp, 8
83; LMULMAX1-RV32-NEXT:    vlse64.v v8, (a1), zero
84; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
85; LMULMAX1-RV32-NEXT:    addi sp, sp, 16
86; LMULMAX1-RV32-NEXT:    ret
87;
88; LMULMAX8-RV64-LABEL: splat_v2i64:
89; LMULMAX8-RV64:       # %bb.0:
90; LMULMAX8-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
91; LMULMAX8-RV64-NEXT:    vmv.v.x v8, a1
92; LMULMAX8-RV64-NEXT:    vse64.v v8, (a0)
93; LMULMAX8-RV64-NEXT:    ret
94;
95; LMULMAX2-RV64-LABEL: splat_v2i64:
96; LMULMAX2-RV64:       # %bb.0:
97; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
98; LMULMAX2-RV64-NEXT:    vmv.v.x v8, a1
99; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
100; LMULMAX2-RV64-NEXT:    ret
101;
102; LMULMAX1-RV64-LABEL: splat_v2i64:
103; LMULMAX1-RV64:       # %bb.0:
104; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
105; LMULMAX1-RV64-NEXT:    vmv.v.x v8, a1
106; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
107; LMULMAX1-RV64-NEXT:    ret
108  %a = insertelement <2 x i64> undef, i64 %y, i32 0
109  %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
110  store <2 x i64> %b, <2 x i64>* %x
111  ret void
112}
113
114define void @splat_v32i8(<32 x i8>* %x, i8 %y) {
115; LMULMAX8-LABEL: splat_v32i8:
116; LMULMAX8:       # %bb.0:
117; LMULMAX8-NEXT:    li a2, 32
118; LMULMAX8-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
119; LMULMAX8-NEXT:    vmv.v.x v8, a1
120; LMULMAX8-NEXT:    vse8.v v8, (a0)
121; LMULMAX8-NEXT:    ret
122;
123; LMULMAX2-LABEL: splat_v32i8:
124; LMULMAX2:       # %bb.0:
125; LMULMAX2-NEXT:    li a2, 32
126; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
127; LMULMAX2-NEXT:    vmv.v.x v8, a1
128; LMULMAX2-NEXT:    vse8.v v8, (a0)
129; LMULMAX2-NEXT:    ret
130;
131; LMULMAX1-LABEL: splat_v32i8:
132; LMULMAX1:       # %bb.0:
133; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
134; LMULMAX1-NEXT:    vmv.v.x v8, a1
135; LMULMAX1-NEXT:    addi a1, a0, 16
136; LMULMAX1-NEXT:    vse8.v v8, (a1)
137; LMULMAX1-NEXT:    vse8.v v8, (a0)
138; LMULMAX1-NEXT:    ret
139  %a = insertelement <32 x i8> undef, i8 %y, i32 0
140  %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
141  store <32 x i8> %b, <32 x i8>* %x
142  ret void
143}
144
145define void @splat_v16i16(<16 x i16>* %x, i16 %y) {
146; LMULMAX8-LABEL: splat_v16i16:
147; LMULMAX8:       # %bb.0:
148; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
149; LMULMAX8-NEXT:    vmv.v.x v8, a1
150; LMULMAX8-NEXT:    vse16.v v8, (a0)
151; LMULMAX8-NEXT:    ret
152;
153; LMULMAX2-LABEL: splat_v16i16:
154; LMULMAX2:       # %bb.0:
155; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
156; LMULMAX2-NEXT:    vmv.v.x v8, a1
157; LMULMAX2-NEXT:    vse16.v v8, (a0)
158; LMULMAX2-NEXT:    ret
159;
160; LMULMAX1-LABEL: splat_v16i16:
161; LMULMAX1:       # %bb.0:
162; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
163; LMULMAX1-NEXT:    vmv.v.x v8, a1
164; LMULMAX1-NEXT:    addi a1, a0, 16
165; LMULMAX1-NEXT:    vse16.v v8, (a1)
166; LMULMAX1-NEXT:    vse16.v v8, (a0)
167; LMULMAX1-NEXT:    ret
168  %a = insertelement <16 x i16> undef, i16 %y, i32 0
169  %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
170  store <16 x i16> %b, <16 x i16>* %x
171  ret void
172}
173
174define void @splat_v8i32(<8 x i32>* %x, i32 %y) {
175; LMULMAX8-LABEL: splat_v8i32:
176; LMULMAX8:       # %bb.0:
177; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
178; LMULMAX8-NEXT:    vmv.v.x v8, a1
179; LMULMAX8-NEXT:    vse32.v v8, (a0)
180; LMULMAX8-NEXT:    ret
181;
182; LMULMAX2-LABEL: splat_v8i32:
183; LMULMAX2:       # %bb.0:
184; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
185; LMULMAX2-NEXT:    vmv.v.x v8, a1
186; LMULMAX2-NEXT:    vse32.v v8, (a0)
187; LMULMAX2-NEXT:    ret
188;
189; LMULMAX1-LABEL: splat_v8i32:
190; LMULMAX1:       # %bb.0:
191; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
192; LMULMAX1-NEXT:    vmv.v.x v8, a1
193; LMULMAX1-NEXT:    addi a1, a0, 16
194; LMULMAX1-NEXT:    vse32.v v8, (a1)
195; LMULMAX1-NEXT:    vse32.v v8, (a0)
196; LMULMAX1-NEXT:    ret
197  %a = insertelement <8 x i32> undef, i32 %y, i32 0
198  %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
199  store <8 x i32> %b, <8 x i32>* %x
200  ret void
201}
202
203define void @splat_v4i64(<4 x i64>* %x, i64 %y) {
204; LMULMAX8-RV32-LABEL: splat_v4i64:
205; LMULMAX8-RV32:       # %bb.0:
206; LMULMAX8-RV32-NEXT:    addi sp, sp, -16
207; LMULMAX8-RV32-NEXT:    .cfi_def_cfa_offset 16
208; LMULMAX8-RV32-NEXT:    sw a2, 12(sp)
209; LMULMAX8-RV32-NEXT:    sw a1, 8(sp)
210; LMULMAX8-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
211; LMULMAX8-RV32-NEXT:    addi a1, sp, 8
212; LMULMAX8-RV32-NEXT:    vlse64.v v8, (a1), zero
213; LMULMAX8-RV32-NEXT:    vse64.v v8, (a0)
214; LMULMAX8-RV32-NEXT:    addi sp, sp, 16
215; LMULMAX8-RV32-NEXT:    ret
216;
217; LMULMAX2-RV32-LABEL: splat_v4i64:
218; LMULMAX2-RV32:       # %bb.0:
219; LMULMAX2-RV32-NEXT:    addi sp, sp, -16
220; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
221; LMULMAX2-RV32-NEXT:    sw a2, 12(sp)
222; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
223; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
224; LMULMAX2-RV32-NEXT:    addi a1, sp, 8
225; LMULMAX2-RV32-NEXT:    vlse64.v v8, (a1), zero
226; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
227; LMULMAX2-RV32-NEXT:    addi sp, sp, 16
228; LMULMAX2-RV32-NEXT:    ret
229;
230; LMULMAX1-RV32-LABEL: splat_v4i64:
231; LMULMAX1-RV32:       # %bb.0:
232; LMULMAX1-RV32-NEXT:    li a3, 5
233; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
234; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a3
235; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
236; LMULMAX1-RV32-NEXT:    vmv.v.x v8, a2
237; LMULMAX1-RV32-NEXT:    vmerge.vxm v8, v8, a1, v0
238; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
239; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
240; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
241; LMULMAX1-RV32-NEXT:    ret
242;
243; LMULMAX8-RV64-LABEL: splat_v4i64:
244; LMULMAX8-RV64:       # %bb.0:
245; LMULMAX8-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
246; LMULMAX8-RV64-NEXT:    vmv.v.x v8, a1
247; LMULMAX8-RV64-NEXT:    vse64.v v8, (a0)
248; LMULMAX8-RV64-NEXT:    ret
249;
250; LMULMAX2-RV64-LABEL: splat_v4i64:
251; LMULMAX2-RV64:       # %bb.0:
252; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
253; LMULMAX2-RV64-NEXT:    vmv.v.x v8, a1
254; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
255; LMULMAX2-RV64-NEXT:    ret
256;
257; LMULMAX1-RV64-LABEL: splat_v4i64:
258; LMULMAX1-RV64:       # %bb.0:
259; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
260; LMULMAX1-RV64-NEXT:    vmv.v.x v8, a1
261; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
262; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
263; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
264; LMULMAX1-RV64-NEXT:    ret
265  %a = insertelement <4 x i64> undef, i64 %y, i32 0
266  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
267  store <4 x i64> %b, <4 x i64>* %x
268  ret void
269}
270
271define void @splat_zero_v16i8(<16 x i8>* %x) {
272; CHECK-LABEL: splat_zero_v16i8:
273; CHECK:       # %bb.0:
274; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
275; CHECK-NEXT:    vmv.v.i v8, 0
276; CHECK-NEXT:    vse8.v v8, (a0)
277; CHECK-NEXT:    ret
278  %a = insertelement <16 x i8> undef, i8 0, i32 0
279  %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
280  store <16 x i8> %b, <16 x i8>* %x
281  ret void
282}
283
284define void @splat_zero_v8i16(<8 x i16>* %x) {
285; CHECK-LABEL: splat_zero_v8i16:
286; CHECK:       # %bb.0:
287; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
288; CHECK-NEXT:    vmv.v.i v8, 0
289; CHECK-NEXT:    vse16.v v8, (a0)
290; CHECK-NEXT:    ret
291  %a = insertelement <8 x i16> undef, i16 0, i32 0
292  %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
293  store <8 x i16> %b, <8 x i16>* %x
294  ret void
295}
296
297define void @splat_zero_v4i32(<4 x i32>* %x) {
298; CHECK-LABEL: splat_zero_v4i32:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
301; CHECK-NEXT:    vmv.v.i v8, 0
302; CHECK-NEXT:    vse32.v v8, (a0)
303; CHECK-NEXT:    ret
304  %a = insertelement <4 x i32> undef, i32 0, i32 0
305  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
306  store <4 x i32> %b, <4 x i32>* %x
307  ret void
308}
309
310define void @splat_zero_v2i64(<2 x i64>* %x) {
311; CHECK-LABEL: splat_zero_v2i64:
312; CHECK:       # %bb.0:
313; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
314; CHECK-NEXT:    vmv.v.i v8, 0
315; CHECK-NEXT:    vse64.v v8, (a0)
316; CHECK-NEXT:    ret
317  %a = insertelement <2 x i64> undef, i64 0, i32 0
318  %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
319  store <2 x i64> %b, <2 x i64>* %x
320  ret void
321}
322
323define void @splat_zero_v32i8(<32 x i8>* %x) {
324; LMULMAX8-LABEL: splat_zero_v32i8:
325; LMULMAX8:       # %bb.0:
326; LMULMAX8-NEXT:    li a1, 32
327; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
328; LMULMAX8-NEXT:    vmv.v.i v8, 0
329; LMULMAX8-NEXT:    vse8.v v8, (a0)
330; LMULMAX8-NEXT:    ret
331;
332; LMULMAX2-LABEL: splat_zero_v32i8:
333; LMULMAX2:       # %bb.0:
334; LMULMAX2-NEXT:    li a1, 32
335; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
336; LMULMAX2-NEXT:    vmv.v.i v8, 0
337; LMULMAX2-NEXT:    vse8.v v8, (a0)
338; LMULMAX2-NEXT:    ret
339;
340; LMULMAX1-LABEL: splat_zero_v32i8:
341; LMULMAX1:       # %bb.0:
342; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
343; LMULMAX1-NEXT:    vmv.v.i v8, 0
344; LMULMAX1-NEXT:    vse8.v v8, (a0)
345; LMULMAX1-NEXT:    addi a0, a0, 16
346; LMULMAX1-NEXT:    vse8.v v8, (a0)
347; LMULMAX1-NEXT:    ret
348  %a = insertelement <32 x i8> undef, i8 0, i32 0
349  %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
350  store <32 x i8> %b, <32 x i8>* %x
351  ret void
352}
353
354define void @splat_zero_v16i16(<16 x i16>* %x) {
355; LMULMAX8-LABEL: splat_zero_v16i16:
356; LMULMAX8:       # %bb.0:
357; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
358; LMULMAX8-NEXT:    vmv.v.i v8, 0
359; LMULMAX8-NEXT:    vse16.v v8, (a0)
360; LMULMAX8-NEXT:    ret
361;
362; LMULMAX2-LABEL: splat_zero_v16i16:
363; LMULMAX2:       # %bb.0:
364; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
365; LMULMAX2-NEXT:    vmv.v.i v8, 0
366; LMULMAX2-NEXT:    vse16.v v8, (a0)
367; LMULMAX2-NEXT:    ret
368;
369; LMULMAX1-LABEL: splat_zero_v16i16:
370; LMULMAX1:       # %bb.0:
371; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
372; LMULMAX1-NEXT:    vmv.v.i v8, 0
373; LMULMAX1-NEXT:    vse16.v v8, (a0)
374; LMULMAX1-NEXT:    addi a0, a0, 16
375; LMULMAX1-NEXT:    vse16.v v8, (a0)
376; LMULMAX1-NEXT:    ret
377  %a = insertelement <16 x i16> undef, i16 0, i32 0
378  %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
379  store <16 x i16> %b, <16 x i16>* %x
380  ret void
381}
382
383define void @splat_zero_v8i32(<8 x i32>* %x) {
384; LMULMAX8-LABEL: splat_zero_v8i32:
385; LMULMAX8:       # %bb.0:
386; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
387; LMULMAX8-NEXT:    vmv.v.i v8, 0
388; LMULMAX8-NEXT:    vse32.v v8, (a0)
389; LMULMAX8-NEXT:    ret
390;
391; LMULMAX2-LABEL: splat_zero_v8i32:
392; LMULMAX2:       # %bb.0:
393; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
394; LMULMAX2-NEXT:    vmv.v.i v8, 0
395; LMULMAX2-NEXT:    vse32.v v8, (a0)
396; LMULMAX2-NEXT:    ret
397;
398; LMULMAX1-LABEL: splat_zero_v8i32:
399; LMULMAX1:       # %bb.0:
400; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
401; LMULMAX1-NEXT:    vmv.v.i v8, 0
402; LMULMAX1-NEXT:    vse32.v v8, (a0)
403; LMULMAX1-NEXT:    addi a0, a0, 16
404; LMULMAX1-NEXT:    vse32.v v8, (a0)
405; LMULMAX1-NEXT:    ret
406  %a = insertelement <8 x i32> undef, i32 0, i32 0
407  %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
408  store <8 x i32> %b, <8 x i32>* %x
409  ret void
410}
411
412define void @splat_zero_v4i64(<4 x i64>* %x) {
413; LMULMAX8-LABEL: splat_zero_v4i64:
414; LMULMAX8:       # %bb.0:
415; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
416; LMULMAX8-NEXT:    vmv.v.i v8, 0
417; LMULMAX8-NEXT:    vse64.v v8, (a0)
418; LMULMAX8-NEXT:    ret
419;
420; LMULMAX2-LABEL: splat_zero_v4i64:
421; LMULMAX2:       # %bb.0:
422; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
423; LMULMAX2-NEXT:    vmv.v.i v8, 0
424; LMULMAX2-NEXT:    vse64.v v8, (a0)
425; LMULMAX2-NEXT:    ret
426;
427; LMULMAX1-RV32-LABEL: splat_zero_v4i64:
428; LMULMAX1-RV32:       # %bb.0:
429; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
430; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
431; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
432; LMULMAX1-RV32-NEXT:    addi a0, a0, 16
433; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
434; LMULMAX1-RV32-NEXT:    ret
435;
436; LMULMAX1-RV64-LABEL: splat_zero_v4i64:
437; LMULMAX1-RV64:       # %bb.0:
438; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
439; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
440; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
441; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
442; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
443; LMULMAX1-RV64-NEXT:    ret
444  %a = insertelement <4 x i64> undef, i64 0, i32 0
445  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
446  store <4 x i64> %b, <4 x i64>* %x
447  ret void
448}
449
450define void @splat_allones_v16i8(<16 x i8>* %x) {
451; CHECK-LABEL: splat_allones_v16i8:
452; CHECK:       # %bb.0:
453; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
454; CHECK-NEXT:    vmv.v.i v8, -1
455; CHECK-NEXT:    vse8.v v8, (a0)
456; CHECK-NEXT:    ret
457  %a = insertelement <16 x i8> undef, i8 -1, i32 0
458  %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
459  store <16 x i8> %b, <16 x i8>* %x
460  ret void
461}
462
463define void @splat_allones_v8i16(<8 x i16>* %x) {
464; CHECK-LABEL: splat_allones_v8i16:
465; CHECK:       # %bb.0:
466; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
467; CHECK-NEXT:    vmv.v.i v8, -1
468; CHECK-NEXT:    vse16.v v8, (a0)
469; CHECK-NEXT:    ret
470  %a = insertelement <8 x i16> undef, i16 -1, i32 0
471  %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
472  store <8 x i16> %b, <8 x i16>* %x
473  ret void
474}
475
476define void @splat_allones_v4i32(<4 x i32>* %x) {
477; CHECK-LABEL: splat_allones_v4i32:
478; CHECK:       # %bb.0:
479; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
480; CHECK-NEXT:    vmv.v.i v8, -1
481; CHECK-NEXT:    vse32.v v8, (a0)
482; CHECK-NEXT:    ret
483  %a = insertelement <4 x i32> undef, i32 -1, i32 0
484  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
485  store <4 x i32> %b, <4 x i32>* %x
486  ret void
487}
488
489define void @splat_allones_v2i64(<2 x i64>* %x) {
490; CHECK-LABEL: splat_allones_v2i64:
491; CHECK:       # %bb.0:
492; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
493; CHECK-NEXT:    vmv.v.i v8, -1
494; CHECK-NEXT:    vse64.v v8, (a0)
495; CHECK-NEXT:    ret
496  %a = insertelement <2 x i64> undef, i64 -1, i32 0
497  %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
498  store <2 x i64> %b, <2 x i64>* %x
499  ret void
500}
501
502define void @splat_allones_v32i8(<32 x i8>* %x) {
503; LMULMAX8-LABEL: splat_allones_v32i8:
504; LMULMAX8:       # %bb.0:
505; LMULMAX8-NEXT:    li a1, 32
506; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
507; LMULMAX8-NEXT:    vmv.v.i v8, -1
508; LMULMAX8-NEXT:    vse8.v v8, (a0)
509; LMULMAX8-NEXT:    ret
510;
511; LMULMAX2-LABEL: splat_allones_v32i8:
512; LMULMAX2:       # %bb.0:
513; LMULMAX2-NEXT:    li a1, 32
514; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
515; LMULMAX2-NEXT:    vmv.v.i v8, -1
516; LMULMAX2-NEXT:    vse8.v v8, (a0)
517; LMULMAX2-NEXT:    ret
518;
519; LMULMAX1-LABEL: splat_allones_v32i8:
520; LMULMAX1:       # %bb.0:
521; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
522; LMULMAX1-NEXT:    vmv.v.i v8, -1
523; LMULMAX1-NEXT:    vse8.v v8, (a0)
524; LMULMAX1-NEXT:    addi a0, a0, 16
525; LMULMAX1-NEXT:    vse8.v v8, (a0)
526; LMULMAX1-NEXT:    ret
527  %a = insertelement <32 x i8> undef, i8 -1, i32 0
528  %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
529  store <32 x i8> %b, <32 x i8>* %x
530  ret void
531}
532
533define void @splat_allones_v16i16(<16 x i16>* %x) {
534; LMULMAX8-LABEL: splat_allones_v16i16:
535; LMULMAX8:       # %bb.0:
536; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
537; LMULMAX8-NEXT:    vmv.v.i v8, -1
538; LMULMAX8-NEXT:    vse16.v v8, (a0)
539; LMULMAX8-NEXT:    ret
540;
541; LMULMAX2-LABEL: splat_allones_v16i16:
542; LMULMAX2:       # %bb.0:
543; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
544; LMULMAX2-NEXT:    vmv.v.i v8, -1
545; LMULMAX2-NEXT:    vse16.v v8, (a0)
546; LMULMAX2-NEXT:    ret
547;
548; LMULMAX1-LABEL: splat_allones_v16i16:
549; LMULMAX1:       # %bb.0:
550; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
551; LMULMAX1-NEXT:    vmv.v.i v8, -1
552; LMULMAX1-NEXT:    vse16.v v8, (a0)
553; LMULMAX1-NEXT:    addi a0, a0, 16
554; LMULMAX1-NEXT:    vse16.v v8, (a0)
555; LMULMAX1-NEXT:    ret
556  %a = insertelement <16 x i16> undef, i16 -1, i32 0
557  %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
558  store <16 x i16> %b, <16 x i16>* %x
559  ret void
560}
561
562define void @splat_allones_v8i32(<8 x i32>* %x) {
563; LMULMAX8-LABEL: splat_allones_v8i32:
564; LMULMAX8:       # %bb.0:
565; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
566; LMULMAX8-NEXT:    vmv.v.i v8, -1
567; LMULMAX8-NEXT:    vse32.v v8, (a0)
568; LMULMAX8-NEXT:    ret
569;
570; LMULMAX2-LABEL: splat_allones_v8i32:
571; LMULMAX2:       # %bb.0:
572; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
573; LMULMAX2-NEXT:    vmv.v.i v8, -1
574; LMULMAX2-NEXT:    vse32.v v8, (a0)
575; LMULMAX2-NEXT:    ret
576;
577; LMULMAX1-LABEL: splat_allones_v8i32:
578; LMULMAX1:       # %bb.0:
579; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
580; LMULMAX1-NEXT:    vmv.v.i v8, -1
581; LMULMAX1-NEXT:    vse32.v v8, (a0)
582; LMULMAX1-NEXT:    addi a0, a0, 16
583; LMULMAX1-NEXT:    vse32.v v8, (a0)
584; LMULMAX1-NEXT:    ret
585  %a = insertelement <8 x i32> undef, i32 -1, i32 0
586  %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
587  store <8 x i32> %b, <8 x i32>* %x
588  ret void
589}
590
591define void @splat_allones_v4i64(<4 x i64>* %x) {
592; LMULMAX8-LABEL: splat_allones_v4i64:
593; LMULMAX8:       # %bb.0:
594; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
595; LMULMAX8-NEXT:    vmv.v.i v8, -1
596; LMULMAX8-NEXT:    vse64.v v8, (a0)
597; LMULMAX8-NEXT:    ret
598;
599; LMULMAX2-LABEL: splat_allones_v4i64:
600; LMULMAX2:       # %bb.0:
601; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
602; LMULMAX2-NEXT:    vmv.v.i v8, -1
603; LMULMAX2-NEXT:    vse64.v v8, (a0)
604; LMULMAX2-NEXT:    ret
605;
606; LMULMAX1-RV32-LABEL: splat_allones_v4i64:
607; LMULMAX1-RV32:       # %bb.0:
608; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
609; LMULMAX1-RV32-NEXT:    vmv.v.i v8, -1
610; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
611; LMULMAX1-RV32-NEXT:    addi a0, a0, 16
612; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
613; LMULMAX1-RV32-NEXT:    ret
614;
615; LMULMAX1-RV64-LABEL: splat_allones_v4i64:
616; LMULMAX1-RV64:       # %bb.0:
617; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
618; LMULMAX1-RV64-NEXT:    vmv.v.i v8, -1
619; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
620; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
621; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
622; LMULMAX1-RV64-NEXT:    ret
623  %a = insertelement <4 x i64> undef, i64 -1, i32 0
624  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
625  store <4 x i64> %b, <4 x i64>* %x
626  ret void
627}
628
629; This requires a bitcast on RV32 due to type legalization rewriting the
630; build_vector to v8i32.
631; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x
632; with SEW=64 on RV32.
633define void @splat_allones_with_use_v4i64(<4 x i64>* %x) {
634; LMULMAX8-LABEL: splat_allones_with_use_v4i64:
635; LMULMAX8:       # %bb.0:
636; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
637; LMULMAX8-NEXT:    vle64.v v8, (a0)
638; LMULMAX8-NEXT:    vadd.vi v8, v8, -1
639; LMULMAX8-NEXT:    vse64.v v8, (a0)
640; LMULMAX8-NEXT:    ret
641;
642; LMULMAX2-LABEL: splat_allones_with_use_v4i64:
643; LMULMAX2:       # %bb.0:
644; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
645; LMULMAX2-NEXT:    vle64.v v8, (a0)
646; LMULMAX2-NEXT:    vadd.vi v8, v8, -1
647; LMULMAX2-NEXT:    vse64.v v8, (a0)
648; LMULMAX2-NEXT:    ret
649;
650; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64:
651; LMULMAX1-RV32:       # %bb.0:
652; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
653; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
654; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
655; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
656; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
657; LMULMAX1-RV32-NEXT:    vmv.v.i v10, -1
658; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
659; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
660; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
661; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
662; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
663; LMULMAX1-RV32-NEXT:    ret
664;
665; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64:
666; LMULMAX1-RV64:       # %bb.0:
667; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
668; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
669; LMULMAX1-RV64-NEXT:    vle64.v v8, (a1)
670; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
671; LMULMAX1-RV64-NEXT:    vadd.vi v8, v8, -1
672; LMULMAX1-RV64-NEXT:    vadd.vi v9, v9, -1
673; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
674; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
675; LMULMAX1-RV64-NEXT:    ret
676  %a = load <4 x i64>, <4 x i64>* %x
677  %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
678  store <4 x i64> %b, <4 x i64>* %x
679  ret void
680}
681
682; This test used to crash at LMUL=8 when inserting a v16i64 subvector into
683; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of
684; which exceeded maximum-expected size of 512. The scalable container type of
685; nxv8i64 should have been used instead.
686define void @vadd_vx_v16i64(<16 x i64>* %a, i64 %b, <16 x i64>* %c) {
687; LMULMAX8-RV32-LABEL: vadd_vx_v16i64:
688; LMULMAX8-RV32:       # %bb.0:
689; LMULMAX8-RV32-NEXT:    addi sp, sp, -16
690; LMULMAX8-RV32-NEXT:    .cfi_def_cfa_offset 16
691; LMULMAX8-RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
692; LMULMAX8-RV32-NEXT:    vle64.v v8, (a0)
693; LMULMAX8-RV32-NEXT:    sw a2, 12(sp)
694; LMULMAX8-RV32-NEXT:    sw a1, 8(sp)
695; LMULMAX8-RV32-NEXT:    addi a0, sp, 8
696; LMULMAX8-RV32-NEXT:    vlse64.v v16, (a0), zero
697; LMULMAX8-RV32-NEXT:    vadd.vv v8, v8, v16
698; LMULMAX8-RV32-NEXT:    vse64.v v8, (a3)
699; LMULMAX8-RV32-NEXT:    addi sp, sp, 16
700; LMULMAX8-RV32-NEXT:    ret
701;
702; LMULMAX2-RV32-LABEL: vadd_vx_v16i64:
703; LMULMAX2-RV32:       # %bb.0:
704; LMULMAX2-RV32-NEXT:    addi a4, a0, 64
705; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
706; LMULMAX2-RV32-NEXT:    vle64.v v8, (a4)
707; LMULMAX2-RV32-NEXT:    addi a4, a0, 96
708; LMULMAX2-RV32-NEXT:    vle64.v v10, (a4)
709; LMULMAX2-RV32-NEXT:    vle64.v v12, (a0)
710; LMULMAX2-RV32-NEXT:    addi a0, a0, 32
711; LMULMAX2-RV32-NEXT:    vle64.v v14, (a0)
712; LMULMAX2-RV32-NEXT:    li a0, 85
713; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
714; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a0
715; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
716; LMULMAX2-RV32-NEXT:    vmv.v.x v16, a2
717; LMULMAX2-RV32-NEXT:    vmerge.vxm v16, v16, a1, v0
718; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
719; LMULMAX2-RV32-NEXT:    vadd.vv v14, v14, v16
720; LMULMAX2-RV32-NEXT:    vadd.vv v12, v12, v16
721; LMULMAX2-RV32-NEXT:    vadd.vv v10, v10, v16
722; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v16
723; LMULMAX2-RV32-NEXT:    addi a0, a3, 64
724; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
725; LMULMAX2-RV32-NEXT:    addi a0, a3, 96
726; LMULMAX2-RV32-NEXT:    vse64.v v10, (a0)
727; LMULMAX2-RV32-NEXT:    vse64.v v12, (a3)
728; LMULMAX2-RV32-NEXT:    addi a0, a3, 32
729; LMULMAX2-RV32-NEXT:    vse64.v v14, (a0)
730; LMULMAX2-RV32-NEXT:    ret
731;
732; LMULMAX1-RV32-LABEL: vadd_vx_v16i64:
733; LMULMAX1-RV32:       # %bb.0:
734; LMULMAX1-RV32-NEXT:    addi a4, a0, 96
735; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
736; LMULMAX1-RV32-NEXT:    vle64.v v8, (a4)
737; LMULMAX1-RV32-NEXT:    addi a4, a0, 112
738; LMULMAX1-RV32-NEXT:    vle64.v v9, (a4)
739; LMULMAX1-RV32-NEXT:    addi a4, a0, 64
740; LMULMAX1-RV32-NEXT:    vle64.v v10, (a4)
741; LMULMAX1-RV32-NEXT:    addi a4, a0, 80
742; LMULMAX1-RV32-NEXT:    vle64.v v11, (a4)
743; LMULMAX1-RV32-NEXT:    addi a4, a0, 32
744; LMULMAX1-RV32-NEXT:    vle64.v v12, (a4)
745; LMULMAX1-RV32-NEXT:    addi a4, a0, 48
746; LMULMAX1-RV32-NEXT:    vle64.v v13, (a4)
747; LMULMAX1-RV32-NEXT:    vle64.v v14, (a0)
748; LMULMAX1-RV32-NEXT:    addi a0, a0, 16
749; LMULMAX1-RV32-NEXT:    vle64.v v15, (a0)
750; LMULMAX1-RV32-NEXT:    li a0, 5
751; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
752; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a0
753; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
754; LMULMAX1-RV32-NEXT:    vmv.v.x v16, a2
755; LMULMAX1-RV32-NEXT:    vmerge.vxm v16, v16, a1, v0
756; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
757; LMULMAX1-RV32-NEXT:    vadd.vv v15, v15, v16
758; LMULMAX1-RV32-NEXT:    vadd.vv v14, v14, v16
759; LMULMAX1-RV32-NEXT:    vadd.vv v13, v13, v16
760; LMULMAX1-RV32-NEXT:    vadd.vv v12, v12, v16
761; LMULMAX1-RV32-NEXT:    vadd.vv v11, v11, v16
762; LMULMAX1-RV32-NEXT:    vadd.vv v10, v10, v16
763; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v16
764; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v16
765; LMULMAX1-RV32-NEXT:    addi a0, a3, 96
766; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
767; LMULMAX1-RV32-NEXT:    addi a0, a3, 112
768; LMULMAX1-RV32-NEXT:    vse64.v v9, (a0)
769; LMULMAX1-RV32-NEXT:    addi a0, a3, 64
770; LMULMAX1-RV32-NEXT:    vse64.v v10, (a0)
771; LMULMAX1-RV32-NEXT:    addi a0, a3, 80
772; LMULMAX1-RV32-NEXT:    vse64.v v11, (a0)
773; LMULMAX1-RV32-NEXT:    addi a0, a3, 32
774; LMULMAX1-RV32-NEXT:    vse64.v v12, (a0)
775; LMULMAX1-RV32-NEXT:    addi a0, a3, 48
776; LMULMAX1-RV32-NEXT:    vse64.v v13, (a0)
777; LMULMAX1-RV32-NEXT:    vse64.v v14, (a3)
778; LMULMAX1-RV32-NEXT:    addi a0, a3, 16
779; LMULMAX1-RV32-NEXT:    vse64.v v15, (a0)
780; LMULMAX1-RV32-NEXT:    ret
781;
782; LMULMAX8-RV64-LABEL: vadd_vx_v16i64:
783; LMULMAX8-RV64:       # %bb.0:
784; LMULMAX8-RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
785; LMULMAX8-RV64-NEXT:    vle64.v v8, (a0)
786; LMULMAX8-RV64-NEXT:    vadd.vx v8, v8, a1
787; LMULMAX8-RV64-NEXT:    vse64.v v8, (a2)
788; LMULMAX8-RV64-NEXT:    ret
789;
790; LMULMAX2-RV64-LABEL: vadd_vx_v16i64:
791; LMULMAX2-RV64:       # %bb.0:
792; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
793; LMULMAX2-RV64-NEXT:    addi a3, a0, 96
794; LMULMAX2-RV64-NEXT:    vle64.v v8, (a3)
795; LMULMAX2-RV64-NEXT:    addi a3, a0, 32
796; LMULMAX2-RV64-NEXT:    vle64.v v10, (a3)
797; LMULMAX2-RV64-NEXT:    addi a3, a0, 64
798; LMULMAX2-RV64-NEXT:    vle64.v v12, (a3)
799; LMULMAX2-RV64-NEXT:    vle64.v v14, (a0)
800; LMULMAX2-RV64-NEXT:    vadd.vx v10, v10, a1
801; LMULMAX2-RV64-NEXT:    vadd.vx v8, v8, a1
802; LMULMAX2-RV64-NEXT:    vadd.vx v12, v12, a1
803; LMULMAX2-RV64-NEXT:    vadd.vx v14, v14, a1
804; LMULMAX2-RV64-NEXT:    vse64.v v14, (a2)
805; LMULMAX2-RV64-NEXT:    addi a0, a2, 64
806; LMULMAX2-RV64-NEXT:    vse64.v v12, (a0)
807; LMULMAX2-RV64-NEXT:    addi a0, a2, 96
808; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
809; LMULMAX2-RV64-NEXT:    addi a0, a2, 32
810; LMULMAX2-RV64-NEXT:    vse64.v v10, (a0)
811; LMULMAX2-RV64-NEXT:    ret
812;
813; LMULMAX1-RV64-LABEL: vadd_vx_v16i64:
814; LMULMAX1-RV64:       # %bb.0:
815; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
816; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
817; LMULMAX1-RV64-NEXT:    addi a3, a0, 96
818; LMULMAX1-RV64-NEXT:    vle64.v v9, (a3)
819; LMULMAX1-RV64-NEXT:    addi a3, a0, 112
820; LMULMAX1-RV64-NEXT:    vle64.v v10, (a3)
821; LMULMAX1-RV64-NEXT:    addi a3, a0, 64
822; LMULMAX1-RV64-NEXT:    vle64.v v11, (a3)
823; LMULMAX1-RV64-NEXT:    addi a3, a0, 48
824; LMULMAX1-RV64-NEXT:    vle64.v v12, (a3)
825; LMULMAX1-RV64-NEXT:    addi a3, a0, 16
826; LMULMAX1-RV64-NEXT:    vle64.v v13, (a3)
827; LMULMAX1-RV64-NEXT:    addi a3, a0, 80
828; LMULMAX1-RV64-NEXT:    addi a0, a0, 32
829; LMULMAX1-RV64-NEXT:    vle64.v v14, (a0)
830; LMULMAX1-RV64-NEXT:    vle64.v v15, (a3)
831; LMULMAX1-RV64-NEXT:    vadd.vx v13, v13, a1
832; LMULMAX1-RV64-NEXT:    vadd.vx v12, v12, a1
833; LMULMAX1-RV64-NEXT:    vadd.vx v14, v14, a1
834; LMULMAX1-RV64-NEXT:    vadd.vx v15, v15, a1
835; LMULMAX1-RV64-NEXT:    vadd.vx v11, v11, a1
836; LMULMAX1-RV64-NEXT:    vadd.vx v10, v10, a1
837; LMULMAX1-RV64-NEXT:    vadd.vx v9, v9, a1
838; LMULMAX1-RV64-NEXT:    vadd.vx v8, v8, a1
839; LMULMAX1-RV64-NEXT:    vse64.v v8, (a2)
840; LMULMAX1-RV64-NEXT:    addi a0, a2, 96
841; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
842; LMULMAX1-RV64-NEXT:    addi a0, a2, 112
843; LMULMAX1-RV64-NEXT:    vse64.v v10, (a0)
844; LMULMAX1-RV64-NEXT:    addi a0, a2, 64
845; LMULMAX1-RV64-NEXT:    vse64.v v11, (a0)
846; LMULMAX1-RV64-NEXT:    addi a0, a2, 80
847; LMULMAX1-RV64-NEXT:    vse64.v v15, (a0)
848; LMULMAX1-RV64-NEXT:    addi a0, a2, 32
849; LMULMAX1-RV64-NEXT:    vse64.v v14, (a0)
850; LMULMAX1-RV64-NEXT:    addi a0, a2, 48
851; LMULMAX1-RV64-NEXT:    vse64.v v12, (a0)
852; LMULMAX1-RV64-NEXT:    addi a0, a2, 16
853; LMULMAX1-RV64-NEXT:    vse64.v v13, (a0)
854; LMULMAX1-RV64-NEXT:    ret
855  %va = load <16 x i64>, <16 x i64>* %a
856  %head = insertelement <16 x i64> undef, i64 %b, i32 0
857  %splat = shufflevector <16 x i64> %head, <16 x i64> undef, <16 x i32> zeroinitializer
858  %vc = add <16 x i64> %va, %splat
859  store <16 x i64> %vc, <16 x i64>* %c
860  ret void
861}
862