xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+d,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5; Test that limiting ELEN through zve32 scalarizes elements larger than 32 bits
6; and disables some fractional LMULs.
7
8; This should use LMUL=1.
9define void @add_v4i32(ptr %x, ptr %y) {
10; CHECK-LABEL: add_v4i32:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
13; CHECK-NEXT:    vle32.v v8, (a0)
14; CHECK-NEXT:    vle32.v v9, (a1)
15; CHECK-NEXT:    vadd.vv v8, v8, v9
16; CHECK-NEXT:    vse32.v v8, (a0)
17; CHECK-NEXT:    ret
18  %a = load <4 x i32>, ptr %x
19  %b = load <4 x i32>, ptr %y
20  %c = add <4 x i32> %a, %b
21  store <4 x i32> %c, ptr %x
22  ret void
23}
24
25; i64 vectors should be scalarized
26define void @add_v2i64(ptr %x, ptr %y) {
27; RV32-LABEL: add_v2i64:
28; RV32:       # %bb.0:
29; RV32-NEXT:    lw a2, 0(a1)
30; RV32-NEXT:    lw a3, 4(a1)
31; RV32-NEXT:    lw a4, 0(a0)
32; RV32-NEXT:    lw a5, 4(a0)
33; RV32-NEXT:    lw a6, 8(a0)
34; RV32-NEXT:    lw a7, 12(a0)
35; RV32-NEXT:    lw t0, 12(a1)
36; RV32-NEXT:    lw a1, 8(a1)
37; RV32-NEXT:    add a3, a5, a3
38; RV32-NEXT:    add a2, a4, a2
39; RV32-NEXT:    add a7, a7, t0
40; RV32-NEXT:    add a1, a6, a1
41; RV32-NEXT:    sltu a4, a2, a4
42; RV32-NEXT:    sltu a5, a1, a6
43; RV32-NEXT:    add a3, a3, a4
44; RV32-NEXT:    add a5, a7, a5
45; RV32-NEXT:    sw a2, 0(a0)
46; RV32-NEXT:    sw a3, 4(a0)
47; RV32-NEXT:    sw a1, 8(a0)
48; RV32-NEXT:    sw a5, 12(a0)
49; RV32-NEXT:    ret
50;
51; RV64-LABEL: add_v2i64:
52; RV64:       # %bb.0:
53; RV64-NEXT:    ld a2, 0(a0)
54; RV64-NEXT:    ld a3, 8(a0)
55; RV64-NEXT:    ld a4, 0(a1)
56; RV64-NEXT:    ld a1, 8(a1)
57; RV64-NEXT:    add a2, a2, a4
58; RV64-NEXT:    add a1, a3, a1
59; RV64-NEXT:    sd a2, 0(a0)
60; RV64-NEXT:    sd a1, 8(a0)
61; RV64-NEXT:    ret
62  %a = load <2 x i64>, ptr %x
63  %b = load <2 x i64>, ptr %y
64  %c = add <2 x i64> %a, %b
65  store <2 x i64> %c, ptr %x
66  ret void
67}
68
69; This should use LMUL=1 becuase there are no fractional i32 LMULs with ELEN=32
70define void @add_v2i32(ptr %x, ptr %y) {
71; CHECK-LABEL: add_v2i32:
72; CHECK:       # %bb.0:
73; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
74; CHECK-NEXT:    vle32.v v8, (a0)
75; CHECK-NEXT:    vle32.v v9, (a1)
76; CHECK-NEXT:    vadd.vv v8, v8, v9
77; CHECK-NEXT:    vse32.v v8, (a0)
78; CHECK-NEXT:    ret
79  %a = load <2 x i32>, ptr %x
80  %b = load <2 x i32>, ptr %y
81  %c = add <2 x i32> %a, %b
82  store <2 x i32> %c, ptr %x
83  ret void
84}
85
86; i64 vectors should be scalarized
87define void @add_v1i64(ptr %x, ptr %y) {
88; RV32-LABEL: add_v1i64:
89; RV32:       # %bb.0:
90; RV32-NEXT:    lw a2, 0(a0)
91; RV32-NEXT:    lw a3, 4(a0)
92; RV32-NEXT:    lw a4, 4(a1)
93; RV32-NEXT:    lw a1, 0(a1)
94; RV32-NEXT:    add a3, a3, a4
95; RV32-NEXT:    add a1, a2, a1
96; RV32-NEXT:    sltu a2, a1, a2
97; RV32-NEXT:    add a2, a3, a2
98; RV32-NEXT:    sw a1, 0(a0)
99; RV32-NEXT:    sw a2, 4(a0)
100; RV32-NEXT:    ret
101;
102; RV64-LABEL: add_v1i64:
103; RV64:       # %bb.0:
104; RV64-NEXT:    ld a2, 0(a0)
105; RV64-NEXT:    ld a1, 0(a1)
106; RV64-NEXT:    add a1, a2, a1
107; RV64-NEXT:    sd a1, 0(a0)
108; RV64-NEXT:    ret
109  %a = load <1 x i64>, ptr %x
110  %b = load <1 x i64>, ptr %y
111  %c = add <1 x i64> %a, %b
112  store <1 x i64> %c, ptr %x
113  ret void
114}
115
116; This should use LMUL=1.
117define void @fadd_v4f32(ptr %x, ptr %y) {
118; CHECK-LABEL: fadd_v4f32:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
121; CHECK-NEXT:    vle32.v v8, (a0)
122; CHECK-NEXT:    vle32.v v9, (a1)
123; CHECK-NEXT:    vfadd.vv v8, v8, v9
124; CHECK-NEXT:    vse32.v v8, (a0)
125; CHECK-NEXT:    ret
126  %a = load <4 x float>, ptr %x
127  %b = load <4 x float>, ptr %y
128  %c = fadd <4 x float> %a, %b
129  store <4 x float> %c, ptr %x
130  ret void
131}
132
133; double vectors should be scalarized
134define void @fadd_v2f64(ptr %x, ptr %y) {
135; CHECK-LABEL: fadd_v2f64:
136; CHECK:       # %bb.0:
137; CHECK-NEXT:    fld fa5, 0(a0)
138; CHECK-NEXT:    fld fa4, 8(a0)
139; CHECK-NEXT:    fld fa3, 0(a1)
140; CHECK-NEXT:    fld fa2, 8(a1)
141; CHECK-NEXT:    fadd.d fa5, fa5, fa3
142; CHECK-NEXT:    fadd.d fa4, fa4, fa2
143; CHECK-NEXT:    fsd fa5, 0(a0)
144; CHECK-NEXT:    fsd fa4, 8(a0)
145; CHECK-NEXT:    ret
146  %a = load <2 x double>, ptr %x
147  %b = load <2 x double>, ptr %y
148  %c = fadd <2 x double> %a, %b
149  store <2 x double> %c, ptr %x
150  ret void
151}
152
153; This should use LMUL=1 becuase there are no fractional float LMULs with ELEN=32
154define void @fadd_v2f32(ptr %x, ptr %y) {
155; CHECK-LABEL: fadd_v2f32:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
158; CHECK-NEXT:    vle32.v v8, (a0)
159; CHECK-NEXT:    vle32.v v9, (a1)
160; CHECK-NEXT:    vfadd.vv v8, v8, v9
161; CHECK-NEXT:    vse32.v v8, (a0)
162; CHECK-NEXT:    ret
163  %a = load <2 x float>, ptr %x
164  %b = load <2 x float>, ptr %y
165  %c = fadd <2 x float> %a, %b
166  store <2 x float> %c, ptr %x
167  ret void
168}
169
170; double vectors should be scalarized
171define void @fadd_v1f64(ptr %x, ptr %y) {
172; CHECK-LABEL: fadd_v1f64:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    fld fa5, 0(a0)
175; CHECK-NEXT:    fld fa4, 0(a1)
176; CHECK-NEXT:    fadd.d fa5, fa5, fa4
177; CHECK-NEXT:    fsd fa5, 0(a0)
178; CHECK-NEXT:    ret
179  %a = load <1 x double>, ptr %x
180  %b = load <1 x double>, ptr %y
181  %c = fadd <1 x double> %a, %b
182  store <1 x double> %c, ptr %x
183  ret void
184}
185