xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
6; CHECK-LABEL: add_constant_rhs:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
9; CHECK-NEXT:    vmv.v.x v8, a0
10; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
11; CHECK-NEXT:    addi a0, a0, %lo(.LCPI0_0)
12; CHECK-NEXT:    vle32.v v9, (a0)
13; CHECK-NEXT:    vslide1down.vx v8, v8, a1
14; CHECK-NEXT:    vslide1down.vx v8, v8, a2
15; CHECK-NEXT:    vslide1down.vx v8, v8, a3
16; CHECK-NEXT:    vadd.vv v8, v8, v9
17; CHECK-NEXT:    ret
18  %e0 = add i32 %a, 23
19  %e1 = add i32 %b, 25
20  %e2 = add i32 %c, 1
21  %e3 = add i32 %d, 2355
22  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
23  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
24  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
25  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
26  ret <4 x i32> %v3
27}
28
29define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
30; CHECK-LABEL: add_constant_rhs_8xi32:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
33; CHECK-NEXT:    vmv.v.x v8, a0
34; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
35; CHECK-NEXT:    addi a0, a0, %lo(.LCPI1_0)
36; CHECK-NEXT:    vslide1down.vx v8, v8, a1
37; CHECK-NEXT:    vslide1down.vx v8, v8, a2
38; CHECK-NEXT:    vslide1down.vx v8, v8, a3
39; CHECK-NEXT:    vslide1down.vx v8, v8, a4
40; CHECK-NEXT:    vle32.v v10, (a0)
41; CHECK-NEXT:    vslide1down.vx v8, v8, a5
42; CHECK-NEXT:    vslide1down.vx v8, v8, a6
43; CHECK-NEXT:    vslide1down.vx v8, v8, a7
44; CHECK-NEXT:    vadd.vv v8, v8, v10
45; CHECK-NEXT:    ret
46  %e0 = add i32 %a, 23
47  %e1 = add i32 %b, 25
48  %e2 = add i32 %c, 1
49  %e3 = add i32 %d, 2355
50  %e4 = add i32 %e, 23
51  %e5 = add i32 %f, 23
52  %e6 = add i32 %g, 22
53  %e7 = add i32 %h, 23
54  %v0 = insertelement <8 x i32> poison, i32 %e0, i32 0
55  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 1
56  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 2
57  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 3
58  %v4 = insertelement <8 x i32> %v3, i32 %e4, i32 4
59  %v5 = insertelement <8 x i32> %v4, i32 %e5, i32 5
60  %v6 = insertelement <8 x i32> %v5, i32 %e6, i32 6
61  %v7 = insertelement <8 x i32> %v6, i32 %e7, i32 7
62  ret <8 x i32> %v7
63}
64
65
66define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
67; CHECK-LABEL: sub_constant_rhs:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
70; CHECK-NEXT:    vmv.v.x v8, a0
71; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
72; CHECK-NEXT:    addi a0, a0, %lo(.LCPI2_0)
73; CHECK-NEXT:    vle32.v v9, (a0)
74; CHECK-NEXT:    vslide1down.vx v8, v8, a1
75; CHECK-NEXT:    vslide1down.vx v8, v8, a2
76; CHECK-NEXT:    vslide1down.vx v8, v8, a3
77; CHECK-NEXT:    vsub.vv v8, v8, v9
78; CHECK-NEXT:    ret
79  %e0 = sub i32 %a, 23
80  %e1 = sub i32 %b, 25
81  %e2 = sub i32 %c, 1
82  %e3 = sub i32 %d, 2355
83  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
84  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
85  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
86  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
87  ret <4 x i32> %v3
88}
89
90define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
91; CHECK-LABEL: mul_constant_rhs:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
94; CHECK-NEXT:    vmv.v.x v8, a0
95; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
96; CHECK-NEXT:    addi a0, a0, %lo(.LCPI3_0)
97; CHECK-NEXT:    vle32.v v9, (a0)
98; CHECK-NEXT:    vslide1down.vx v8, v8, a1
99; CHECK-NEXT:    vslide1down.vx v8, v8, a2
100; CHECK-NEXT:    vslide1down.vx v8, v8, a3
101; CHECK-NEXT:    vmul.vv v8, v8, v9
102; CHECK-NEXT:    ret
103  %e0 = mul i32 %a, 23
104  %e1 = mul i32 %b, 25
105  %e2 = mul i32 %c, 27
106  %e3 = mul i32 %d, 2355
107  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
108  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
109  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
110  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
111  ret <4 x i32> %v3
112}
113
114define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
115; CHECK-LABEL: udiv_constant_rhs:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
118; CHECK-NEXT:    vmv.v.x v8, a0
119; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
120; CHECK-NEXT:    addi a0, a0, %lo(.LCPI4_0)
121; CHECK-NEXT:    vmv.v.i v9, 0
122; CHECK-NEXT:    vslide1down.vx v8, v8, a1
123; CHECK-NEXT:    lui a1, 524288
124; CHECK-NEXT:    vle32.v v10, (a0)
125; CHECK-NEXT:    lui a0, %hi(.LCPI4_1)
126; CHECK-NEXT:    addi a0, a0, %lo(.LCPI4_1)
127; CHECK-NEXT:    vslide1down.vx v9, v9, a1
128; CHECK-NEXT:    vle32.v v11, (a0)
129; CHECK-NEXT:    vslide1down.vx v8, v8, a2
130; CHECK-NEXT:    vslide1down.vx v8, v8, a3
131; CHECK-NEXT:    vmulhu.vv v10, v8, v10
132; CHECK-NEXT:    vsub.vv v12, v8, v10
133; CHECK-NEXT:    vmulhu.vv v9, v12, v9
134; CHECK-NEXT:    vadd.vv v9, v9, v10
135; CHECK-NEXT:    vmv.v.i v0, 4
136; CHECK-NEXT:    vsrl.vv v9, v9, v11
137; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
138; CHECK-NEXT:    ret
139  %e0 = udiv i32 %a, 23
140  %e1 = udiv i32 %b, 25
141  %e2 = udiv i32 %c, 1
142  %e3 = udiv i32 %d, 235
143  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
144  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
145  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
146  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
147  ret <4 x i32> %v3
148}
149
150
151define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
152; CHECK-LABEL: fadd_constant_rhs:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
155; CHECK-NEXT:    vfmv.v.f v8, fa0
156; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
157; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_0)
158; CHECK-NEXT:    vle32.v v9, (a0)
159; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
160; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
161; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
162; CHECK-NEXT:    vfadd.vv v8, v8, v9
163; CHECK-NEXT:    ret
164  %e0 = fadd float %a, 23.0
165  %e1 = fadd float %b, 25.0
166  %e2 = fadd float %c, 2.0
167  %e3 = fadd float %d, 23.0
168  %v0 = insertelement <4 x float> poison, float %e0, i32 0
169  %v1 = insertelement <4 x float> %v0, float %e1, i32 1
170  %v2 = insertelement <4 x float> %v1, float %e2, i32 2
171  %v3 = insertelement <4 x float> %v2, float %e3, i32 3
172  ret <4 x float> %v3
173}
174
175define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
176; CHECK-LABEL: fdiv_constant_rhs:
177; CHECK:       # %bb.0:
178; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
179; CHECK-NEXT:    vfmv.v.f v8, fa0
180; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
181; CHECK-NEXT:    addi a0, a0, %lo(.LCPI6_0)
182; CHECK-NEXT:    vle32.v v9, (a0)
183; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
184; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
185; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
186; CHECK-NEXT:    vfdiv.vv v8, v8, v9
187; CHECK-NEXT:    ret
188  %e0 = fdiv float %a, 23.0
189  %e1 = fdiv float %b, 25.0
190  %e2 = fdiv float %c, 10.0
191  %e3 = fdiv float %d, 23.0
192  %v0 = insertelement <4 x float> poison, float %e0, i32 0
193  %v1 = insertelement <4 x float> %v0, float %e1, i32 1
194  %v2 = insertelement <4 x float> %v1, float %e2, i32 2
195  %v3 = insertelement <4 x float> %v2, float %e3, i32 3
196  ret <4 x float> %v3
197}
198
199define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) {
200; CHECK-LABEL: add_constant_rhs_splat:
201; CHECK:       # %bb.0:
202; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
203; CHECK-NEXT:    vmv.v.x v8, a0
204; CHECK-NEXT:    vslide1down.vx v8, v8, a1
205; CHECK-NEXT:    vslide1down.vx v8, v8, a2
206; CHECK-NEXT:    vslide1down.vx v8, v8, a3
207; CHECK-NEXT:    li a0, 23
208; CHECK-NEXT:    vadd.vx v8, v8, a0
209; CHECK-NEXT:    ret
210  %e0 = add i32 %a, 23
211  %e1 = add i32 %b, 23
212  %e2 = add i32 %c, 23
213  %e3 = add i32 %d, 23
214  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
215  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
216  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
217  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
218  ret <4 x i32> %v3
219}
220
221define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
222; RV32-LABEL: add_constant_rhs_with_identity:
223; RV32:       # %bb.0:
224; RV32-NEXT:    addi a1, a1, 25
225; RV32-NEXT:    addi a2, a2, 1
226; RV32-NEXT:    addi a3, a3, 2047
227; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
228; RV32-NEXT:    vmv.v.x v8, a0
229; RV32-NEXT:    addi a0, a3, 308
230; RV32-NEXT:    vslide1down.vx v8, v8, a1
231; RV32-NEXT:    vslide1down.vx v8, v8, a2
232; RV32-NEXT:    vslide1down.vx v8, v8, a0
233; RV32-NEXT:    ret
234;
235; RV64-LABEL: add_constant_rhs_with_identity:
236; RV64:       # %bb.0:
237; RV64-NEXT:    addiw a1, a1, 25
238; RV64-NEXT:    addiw a2, a2, 1
239; RV64-NEXT:    addi a3, a3, 2047
240; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
241; RV64-NEXT:    vmv.v.x v8, a0
242; RV64-NEXT:    addiw a0, a3, 308
243; RV64-NEXT:    vslide1down.vx v8, v8, a1
244; RV64-NEXT:    vslide1down.vx v8, v8, a2
245; RV64-NEXT:    vslide1down.vx v8, v8, a0
246; RV64-NEXT:    ret
247  %e0 = add i32 %a, 0
248  %e1 = add i32 %b, 25
249  %e2 = add i32 %c, 1
250  %e3 = add i32 %d, 2355
251  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
252  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
253  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
254  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
255  ret <4 x i32> %v3
256}
257
258define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
259; RV32-LABEL: add_constant_rhs_identity:
260; RV32:       # %bb.0:
261; RV32-NEXT:    addi a1, a1, 25
262; RV32-NEXT:    addi a2, a2, 1
263; RV32-NEXT:    addi a3, a3, 2047
264; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
265; RV32-NEXT:    vmv.v.x v8, a0
266; RV32-NEXT:    addi a0, a3, 308
267; RV32-NEXT:    vslide1down.vx v8, v8, a1
268; RV32-NEXT:    vslide1down.vx v8, v8, a2
269; RV32-NEXT:    vslide1down.vx v8, v8, a0
270; RV32-NEXT:    ret
271;
272; RV64-LABEL: add_constant_rhs_identity:
273; RV64:       # %bb.0:
274; RV64-NEXT:    addiw a1, a1, 25
275; RV64-NEXT:    addiw a2, a2, 1
276; RV64-NEXT:    addi a3, a3, 2047
277; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
278; RV64-NEXT:    vmv.v.x v8, a0
279; RV64-NEXT:    addiw a0, a3, 308
280; RV64-NEXT:    vslide1down.vx v8, v8, a1
281; RV64-NEXT:    vslide1down.vx v8, v8, a2
282; RV64-NEXT:    vslide1down.vx v8, v8, a0
283; RV64-NEXT:    ret
284  %e0 = add i32 %a, 0
285  %e1 = add i32 %b, 25
286  %e2 = add i32 %c, 1
287  %e3 = add i32 %d, 2355
288  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
289  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
290  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
291  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
292  ret <4 x i32> %v3
293}
294
295define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) {
296; CHECK-LABEL: add_constant_rhs_identity2:
297; CHECK:       # %bb.0:
298; CHECK-NEXT:    addi a0, a0, 23
299; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
300; CHECK-NEXT:    vmv.v.x v8, a0
301; CHECK-NEXT:    vslide1down.vx v8, v8, a1
302; CHECK-NEXT:    vslide1down.vx v8, v8, a2
303; CHECK-NEXT:    vslide1down.vx v8, v8, a3
304; CHECK-NEXT:    ret
305  %e0 = add i32 %a, 23
306  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
307  %v1 = insertelement <4 x i32> %v0, i32 %b, i32 1
308  %v2 = insertelement <4 x i32> %v1, i32 %c, i32 2
309  %v3 = insertelement <4 x i32> %v2, i32 %d, i32 3
310  ret <4 x i32> %v3
311}
312
313define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
314; CHECK-LABEL: add_constant_rhs_inverse:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
317; CHECK-NEXT:    vmv.v.x v8, a0
318; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
319; CHECK-NEXT:    addi a0, a0, %lo(.LCPI11_0)
320; CHECK-NEXT:    vle32.v v9, (a0)
321; CHECK-NEXT:    vslide1down.vx v8, v8, a1
322; CHECK-NEXT:    vslide1down.vx v8, v8, a2
323; CHECK-NEXT:    vslide1down.vx v8, v8, a3
324; CHECK-NEXT:    vadd.vv v8, v8, v9
325; CHECK-NEXT:    ret
326  %e0 = sub i32 %a, 1
327  %e1 = add i32 %b, 25
328  %e2 = add i32 %c, 1
329  %e3 = add i32 %d, 2355
330  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
331  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
332  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
333  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
334  ret <4 x i32> %v3
335}
336
337define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) {
338; CHECK-LABEL: add_constant_rhs_commute:
339; CHECK:       # %bb.0:
340; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
341; CHECK-NEXT:    vmv.v.x v8, a0
342; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
343; CHECK-NEXT:    addi a0, a0, %lo(.LCPI12_0)
344; CHECK-NEXT:    vle32.v v9, (a0)
345; CHECK-NEXT:    vslide1down.vx v8, v8, a1
346; CHECK-NEXT:    vslide1down.vx v8, v8, a2
347; CHECK-NEXT:    vslide1down.vx v8, v8, a3
348; CHECK-NEXT:    vadd.vv v8, v8, v9
349; CHECK-NEXT:    ret
350  %e0 = add i32 %a, 23
351  %e1 = add i32 %b, 25
352  %e2 = add i32 1, %c
353  %e3 = add i32 %d, 2355
354  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
355  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
356  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
357  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
358  ret <4 x i32> %v3
359}
360
361
362define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
363; RV32-LABEL: add_general_rhs:
364; RV32:       # %bb.0:
365; RV32-NEXT:    add a0, a0, a4
366; RV32-NEXT:    add a1, a1, a5
367; RV32-NEXT:    add a2, a2, a6
368; RV32-NEXT:    add a3, a3, a7
369; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
370; RV32-NEXT:    vmv.v.x v8, a0
371; RV32-NEXT:    vslide1down.vx v8, v8, a1
372; RV32-NEXT:    vslide1down.vx v8, v8, a2
373; RV32-NEXT:    vslide1down.vx v8, v8, a3
374; RV32-NEXT:    ret
375;
376; RV64-LABEL: add_general_rhs:
377; RV64:       # %bb.0:
378; RV64-NEXT:    add a0, a0, a4
379; RV64-NEXT:    addw a1, a1, a5
380; RV64-NEXT:    addw a2, a2, a6
381; RV64-NEXT:    addw a3, a3, a7
382; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
383; RV64-NEXT:    vmv.v.x v8, a0
384; RV64-NEXT:    vslide1down.vx v8, v8, a1
385; RV64-NEXT:    vslide1down.vx v8, v8, a2
386; RV64-NEXT:    vslide1down.vx v8, v8, a3
387; RV64-NEXT:    ret
388  %e0 = add i32 %a, %e
389  %e1 = add i32 %b, %f
390  %e2 = add i32 %c, %g
391  %e3 = add i32 %d, %h
392  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
393  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
394  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
395  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
396  ret <4 x i32> %v3
397}
398
399define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
400; RV32-LABEL: add_general_splat:
401; RV32:       # %bb.0:
402; RV32-NEXT:    add a0, a0, a4
403; RV32-NEXT:    add a1, a1, a4
404; RV32-NEXT:    add a2, a2, a4
405; RV32-NEXT:    add a3, a3, a4
406; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
407; RV32-NEXT:    vmv.v.x v8, a0
408; RV32-NEXT:    vslide1down.vx v8, v8, a1
409; RV32-NEXT:    vslide1down.vx v8, v8, a2
410; RV32-NEXT:    vslide1down.vx v8, v8, a3
411; RV32-NEXT:    ret
412;
413; RV64-LABEL: add_general_splat:
414; RV64:       # %bb.0:
415; RV64-NEXT:    add a0, a0, a4
416; RV64-NEXT:    addw a1, a1, a4
417; RV64-NEXT:    addw a2, a2, a4
418; RV64-NEXT:    addw a3, a3, a4
419; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
420; RV64-NEXT:    vmv.v.x v8, a0
421; RV64-NEXT:    vslide1down.vx v8, v8, a1
422; RV64-NEXT:    vslide1down.vx v8, v8, a2
423; RV64-NEXT:    vslide1down.vx v8, v8, a3
424; RV64-NEXT:    ret
425  %e0 = add i32 %a, %e
426  %e1 = add i32 %b, %e
427  %e2 = add i32 %c, %e
428  %e3 = add i32 %d, %e
429  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
430  %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
431  %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
432  %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
433  ret <4 x i32> %v3
434}
435
436; This test previously failed with an assertion failure because constant shift
437; amounts are type legalized early.
438define void @buggy(i32 %0) #0 {
439; RV32-LABEL: buggy:
440; RV32:       # %bb.0: # %entry
441; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
442; RV32-NEXT:    vmv.v.x v8, a0
443; RV32-NEXT:    vadd.vv v8, v8, v8
444; RV32-NEXT:    vor.vi v8, v8, 1
445; RV32-NEXT:    vrgather.vi v9, v8, 0
446; RV32-NEXT:    vse32.v v9, (zero)
447; RV32-NEXT:    ret
448;
449; RV64-LABEL: buggy:
450; RV64:       # %bb.0: # %entry
451; RV64-NEXT:    slli a0, a0, 1
452; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
453; RV64-NEXT:    vmv.v.x v8, a0
454; RV64-NEXT:    vor.vi v8, v8, 1
455; RV64-NEXT:    vrgather.vi v9, v8, 0
456; RV64-NEXT:    vse32.v v9, (zero)
457; RV64-NEXT:    ret
458entry:
459  %mul.us.us.i.3 = shl i32 %0, 1
460  %1 = insertelement <4 x i32> zeroinitializer, i32 %mul.us.us.i.3, i64 0
461  %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
462  %3 = shufflevector <4 x i32> %2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
463  store <4 x i32> %3, ptr null, align 16
464  ret void
465}
466
467
468define <8 x i32> @add_constant_rhs_8xi32_vector_in(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
469; CHECK-LABEL: add_constant_rhs_8xi32_vector_in:
470; CHECK:       # %bb.0:
471; CHECK-NEXT:    addi a0, a0, 23
472; CHECK-NEXT:    addi a1, a1, 25
473; CHECK-NEXT:    addi a2, a2, 1
474; CHECK-NEXT:    addi a3, a3, 2047
475; CHECK-NEXT:    addi a3, a3, 308
476; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
477; CHECK-NEXT:    vmv.s.x v8, a0
478; CHECK-NEXT:    vmv.s.x v10, a1
479; CHECK-NEXT:    vslideup.vi v8, v10, 1
480; CHECK-NEXT:    vmv.s.x v10, a2
481; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
482; CHECK-NEXT:    vslideup.vi v8, v10, 2
483; CHECK-NEXT:    vmv.s.x v10, a3
484; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
485; CHECK-NEXT:    vslideup.vi v8, v10, 3
486; CHECK-NEXT:    ret
487  %e0 = add i32 %a, 23
488  %e1 = add i32 %b, 25
489  %e2 = add i32 %c, 1
490  %e3 = add i32 %d, 2355
491  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0
492  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 1
493  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 2
494  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 3
495  ret <8 x i32> %v3
496}
497
498define <8 x i32> @add_constant_rhs_8xi32_vector_in2(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
499; CHECK-LABEL: add_constant_rhs_8xi32_vector_in2:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    addi a0, a0, 23
502; CHECK-NEXT:    addi a1, a1, 25
503; CHECK-NEXT:    addi a2, a2, 1
504; CHECK-NEXT:    addi a3, a3, 2047
505; CHECK-NEXT:    addi a3, a3, 308
506; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
507; CHECK-NEXT:    vmv.s.x v10, a0
508; CHECK-NEXT:    vslideup.vi v8, v10, 4
509; CHECK-NEXT:    vmv.s.x v10, a1
510; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
511; CHECK-NEXT:    vslideup.vi v8, v10, 5
512; CHECK-NEXT:    vmv.s.x v10, a2
513; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
514; CHECK-NEXT:    vslideup.vi v8, v10, 6
515; CHECK-NEXT:    vmv.s.x v10, a3
516; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
517; CHECK-NEXT:    vslideup.vi v8, v10, 7
518; CHECK-NEXT:    ret
519  %e0 = add i32 %a, 23
520  %e1 = add i32 %b, 25
521  %e2 = add i32 %c, 1
522  %e3 = add i32 %d, 2355
523  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 4
524  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5
525  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6
526  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7
527  ret <8 x i32> %v3
528}
529
530define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
531; CHECK-LABEL: add_constant_rhs_8xi32_vector_in3:
532; CHECK:       # %bb.0:
533; CHECK-NEXT:    addi a0, a0, 23
534; CHECK-NEXT:    addi a1, a1, 25
535; CHECK-NEXT:    addi a2, a2, 1
536; CHECK-NEXT:    addi a3, a3, 2047
537; CHECK-NEXT:    addi a3, a3, 308
538; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
539; CHECK-NEXT:    vmv.s.x v8, a0
540; CHECK-NEXT:    vmv.s.x v10, a1
541; CHECK-NEXT:    vslideup.vi v8, v10, 2
542; CHECK-NEXT:    vmv.s.x v10, a2
543; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
544; CHECK-NEXT:    vslideup.vi v8, v10, 4
545; CHECK-NEXT:    vmv.s.x v10, a3
546; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
547; CHECK-NEXT:    vslideup.vi v8, v10, 6
548; CHECK-NEXT:    ret
549  %e0 = add i32 %a, 23
550  %e1 = add i32 %b, 25
551  %e2 = add i32 %c, 1
552  %e3 = add i32 %d, 2355
553  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0
554  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 2
555  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 4
556  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 6
557  ret <8 x i32> %v3
558}
559
560define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
561; CHECK-LABEL: add_constant_rhs_8xi32_partial:
562; CHECK:       # %bb.0:
563; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
564; CHECK-NEXT:    vmv.s.x v10, a0
565; CHECK-NEXT:    vmv.s.x v12, a1
566; CHECK-NEXT:    vslideup.vi v8, v10, 4
567; CHECK-NEXT:    vmv.s.x v10, a2
568; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
569; CHECK-NEXT:    addi a0, a0, %lo(.LCPI19_0)
570; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
571; CHECK-NEXT:    vslideup.vi v8, v12, 5
572; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
573; CHECK-NEXT:    vle32.v v12, (a0)
574; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
575; CHECK-NEXT:    vslideup.vi v8, v10, 6
576; CHECK-NEXT:    vmv.s.x v10, a3
577; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
578; CHECK-NEXT:    vslideup.vi v8, v10, 7
579; CHECK-NEXT:    vadd.vv v8, v8, v12
580; CHECK-NEXT:    ret
581  %vadd = add <8 x i32> %vin, <i32 1, i32 2, i32 3, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
582  %e0 = add i32 %a, 23
583  %e1 = add i32 %b, 25
584  %e2 = add i32 %c, 1
585  %e3 = add i32 %d, 2355
586  %v0 = insertelement <8 x i32> %vadd, i32 %e0, i32 4
587  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5
588  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6
589  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7
590  ret <8 x i32> %v3
591}
592
593; Here we can not pull the ashr through into the vector domain due to
594; the truncate semantics of the build_vector.  Doing so would
595; truncate before the ashr instead of after it, so if %a or %b
596; is e.g. UINT32_MAX+1 we get different result.
597define <2 x i32> @build_vec_of_trunc_op(i64 %a, i64 %b) {
598; RV32-LABEL: build_vec_of_trunc_op:
599; RV32:       # %bb.0: # %entry
600; RV32-NEXT:    slli a1, a1, 31
601; RV32-NEXT:    srli a0, a0, 1
602; RV32-NEXT:    slli a3, a3, 31
603; RV32-NEXT:    srli a2, a2, 1
604; RV32-NEXT:    or a0, a0, a1
605; RV32-NEXT:    or a2, a2, a3
606; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
607; RV32-NEXT:    vmv.v.x v8, a0
608; RV32-NEXT:    vslide1down.vx v8, v8, a2
609; RV32-NEXT:    ret
610;
611; RV64-LABEL: build_vec_of_trunc_op:
612; RV64:       # %bb.0: # %entry
613; RV64-NEXT:    srli a0, a0, 1
614; RV64-NEXT:    srli a1, a1, 1
615; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
616; RV64-NEXT:    vmv.v.x v8, a0
617; RV64-NEXT:    vslide1down.vx v8, v8, a1
618; RV64-NEXT:    ret
619entry:
620  %conv11.i = ashr i64 %a, 1
621  %conv11.2 = ashr i64 %b, 1
622  %0 = trunc i64 %conv11.i to i32
623  %1 = trunc i64 %conv11.2 to i32
624  %2 = insertelement <2 x i32> zeroinitializer, i32 %0, i64 0
625  %3 = insertelement <2 x i32> %2, i32 %1, i64 1
626  ret <2 x i32> %3
627}
628