xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll (revision cb6f021af2354761357684ffa26ebbe718615244)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
3; RUN:   -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT
4; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
5; RUN:   -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT
6; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -riscv-enable-vl-optimizer \
7; RUN:   -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT
8; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -riscv-enable-vl-optimizer \
9; RUN:   -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT
10
11declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)
12
13define <vscale x 4 x i32> @different_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
14; NOVLOPT-LABEL: different_imm_vl_with_ta:
15; NOVLOPT:       # %bb.0:
16; NOVLOPT-NEXT:    vsetivli zero, 5, e32, m2, ta, ma
17; NOVLOPT-NEXT:    vadd.vv v8, v10, v12
18; NOVLOPT-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
19; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
20; NOVLOPT-NEXT:    ret
21;
22; VLOPT-LABEL: different_imm_vl_with_ta:
23; VLOPT:       # %bb.0:
24; VLOPT-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
25; VLOPT-NEXT:    vadd.vv v8, v10, v12
26; VLOPT-NEXT:    vadd.vv v8, v8, v10
27; VLOPT-NEXT:    ret
28  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
29  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
30  ret <vscale x 4 x i32> %w
31}
32
33define <vscale x 4 x i32> @vlmax_and_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
34; NOVLOPT-LABEL: vlmax_and_imm_vl_with_ta:
35; NOVLOPT:       # %bb.0:
36; NOVLOPT-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
37; NOVLOPT-NEXT:    vadd.vv v8, v10, v12
38; NOVLOPT-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
39; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
40; NOVLOPT-NEXT:    ret
41;
42; VLOPT-LABEL: vlmax_and_imm_vl_with_ta:
43; VLOPT:       # %bb.0:
44; VLOPT-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
45; VLOPT-NEXT:    vadd.vv v8, v10, v12
46; VLOPT-NEXT:    vadd.vv v8, v8, v10
47; VLOPT-NEXT:    ret
48  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
49  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
50  ret <vscale x 4 x i32> %w
51}
52
53; Not beneficial to propagate VL since VL is larger in the use side.
54define <vscale x 4 x i32> @different_imm_vl_with_ta_larger_vl(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
55; CHECK-LABEL: different_imm_vl_with_ta_larger_vl:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
58; CHECK-NEXT:    vadd.vv v8, v10, v12
59; CHECK-NEXT:    vsetivli zero, 5, e32, m2, ta, ma
60; CHECK-NEXT:    vadd.vv v8, v8, v10
61; CHECK-NEXT:    ret
62  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
63  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 5)
64  ret <vscale x 4 x i32> %w
65}
66
67define <vscale x 4 x i32> @different_imm_reg_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
68; CHECK-LABEL: different_imm_reg_vl_with_ta:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
71; CHECK-NEXT:    vadd.vv v8, v10, v12
72; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
73; CHECK-NEXT:    vadd.vv v8, v8, v10
74; CHECK-NEXT:    ret
75  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
76  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
77  ret <vscale x 4 x i32> %w
78}
79
80; Not beneficial to propagate VL since VL is already one.
81define <vscale x 4 x i32> @different_imm_vl_with_ta_1(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
82; CHECK-LABEL: different_imm_vl_with_ta_1:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
85; CHECK-NEXT:    vadd.vv v8, v10, v12
86; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
87; CHECK-NEXT:    vadd.vv v8, v8, v10
88; CHECK-NEXT:    ret
89  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 1)
90  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
91  ret <vscale x 4 x i32> %w
92}
93
94; Propgate %vl2 to last instruction since it is may smaller than %vl1,
95; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are
96; undefined value.
97define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
98; CHECK-LABEL: different_vl_with_ta:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
101; CHECK-NEXT:    vadd.vv v10, v8, v10
102; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
103; CHECK-NEXT:    vadd.vv v8, v10, v8
104; CHECK-NEXT:    ret
105  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
106  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
107  ret <vscale x 4 x i32> %w
108}
109
110; We can propagate VL to a tail-undisturbed policy, provided none of its users
111; are passthrus (i.e. read past VL).
112define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
113; CHECK-LABEL: different_vl_with_tu:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
116; CHECK-NEXT:    vmv2r.v v14, v10
117; CHECK-NEXT:    vadd.vv v14, v10, v12
118; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, ma
119; CHECK-NEXT:    vadd.vv v8, v14, v10
120; CHECK-NEXT:    ret
121  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
122  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl2)
123  ret <vscale x 4 x i32> %w
124}
125
126; We can propagate VL to a tail-undisturbed policy, provided none of its users
127; are passthrus (i.e. read past VL).
128define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
129; NOVLOPT-LABEL: different_imm_vl_with_tu:
130; NOVLOPT:       # %bb.0:
131; NOVLOPT-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
132; NOVLOPT-NEXT:    vmv2r.v v14, v10
133; NOVLOPT-NEXT:    vadd.vv v14, v10, v12
134; NOVLOPT-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
135; NOVLOPT-NEXT:    vadd.vv v8, v14, v10
136; NOVLOPT-NEXT:    ret
137;
138; VLOPT-LABEL: different_imm_vl_with_tu:
139; VLOPT:       # %bb.0:
140; VLOPT-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
141; VLOPT-NEXT:    vmv2r.v v14, v10
142; VLOPT-NEXT:    vadd.vv v14, v10, v12
143; VLOPT-NEXT:    vadd.vv v8, v14, v10
144; VLOPT-NEXT:    ret
145  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
146  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
147  ret <vscale x 4 x i32> %w
148}
149
150; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
151; are demanded.
152define <vscale x 4 x i32> @different_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
153; CHECK-LABEL: different_vl_as_passthru:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
156; CHECK-NEXT:    vmv2r.v v12, v8
157; CHECK-NEXT:    vadd.vv v12, v8, v10
158; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, ma
159; CHECK-NEXT:    vadd.vv v12, v8, v10
160; CHECK-NEXT:    vmv2r.v v8, v12
161; CHECK-NEXT:    ret
162  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
163  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl2)
164  ret <vscale x 4 x i32> %w
165}
166
167; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
168; are demanded.
169define <vscale x 4 x i32> @different_imm_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
170; CHECK-LABEL: different_imm_vl_as_passthru:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
173; CHECK-NEXT:    vmv2r.v v12, v8
174; CHECK-NEXT:    vadd.vv v12, v8, v10
175; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
176; CHECK-NEXT:    vadd.vv v12, v8, v10
177; CHECK-NEXT:    vmv2r.v v8, v12
178; CHECK-NEXT:    ret
179  %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
180  %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
181  ret <vscale x 4 x i32> %w
182}
183
184define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
185; CHECK-LABEL: dont_optimize_tied_def:
186; CHECK:       # %bb.0:
187; CHECK-NEXT:    vsetvli a1, zero, e16, m1, tu, ma
188; CHECK-NEXT:    vwmacc.vv v8, v10, v11
189; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
190; CHECK-NEXT:    vwmacc.vv v8, v10, v11
191; CHECK-NEXT:    ret
192  %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
193  %2 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %1, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl, iXLen 0)
194  ret <vscale x 4 x i32> %2
195}
196
197