xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,RV32
3; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,RV32
4; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,RV32
5; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,RV64
6; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,RV64
7; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,RV64
8; Check that the default value enables the web folding and
9; that it is bigger than 3.
10; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING,RV32
11; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING,RV64
12
13; Check that the scalable vector add/sub/mul operations are all promoted into their
14; vw counterpart when the folding of the web size is increased to 3.
15; We need the web size to be at least 3 for the folding to happen, because
16; %c has 3 uses.
17; see https://github.com/llvm/llvm-project/pull/72340
18
19define <vscale x 2 x i16> @vwop_vscale_sext_i8i16_multiple_users(ptr %x, ptr %y, ptr %z) {
20; NO_FOLDING-LABEL: vwop_vscale_sext_i8i16_multiple_users:
21; NO_FOLDING:       # %bb.0:
22; NO_FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
23; NO_FOLDING-NEXT:    vle8.v v8, (a0)
24; NO_FOLDING-NEXT:    vle8.v v9, (a1)
25; NO_FOLDING-NEXT:    vle8.v v10, (a2)
26; NO_FOLDING-NEXT:    vsext.vf2 v11, v8
27; NO_FOLDING-NEXT:    vsext.vf2 v8, v9
28; NO_FOLDING-NEXT:    vsext.vf2 v9, v10
29; NO_FOLDING-NEXT:    vmul.vv v8, v11, v8
30; NO_FOLDING-NEXT:    vadd.vv v10, v11, v9
31; NO_FOLDING-NEXT:    vsub.vv v9, v11, v9
32; NO_FOLDING-NEXT:    vor.vv v8, v8, v10
33; NO_FOLDING-NEXT:    vor.vv v8, v8, v9
34; NO_FOLDING-NEXT:    ret
35;
36; FOLDING-LABEL: vwop_vscale_sext_i8i16_multiple_users:
37; FOLDING:       # %bb.0:
38; FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
39; FOLDING-NEXT:    vle8.v v8, (a0)
40; FOLDING-NEXT:    vle8.v v9, (a1)
41; FOLDING-NEXT:    vle8.v v10, (a2)
42; FOLDING-NEXT:    vwmul.vv v11, v8, v9
43; FOLDING-NEXT:    vwadd.vv v9, v8, v10
44; FOLDING-NEXT:    vwsub.vv v12, v8, v10
45; FOLDING-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
46; FOLDING-NEXT:    vor.vv v8, v11, v9
47; FOLDING-NEXT:    vor.vv v8, v8, v12
48; FOLDING-NEXT:    ret
49  %a = load <vscale x 2 x i8>, ptr %x
50  %b = load <vscale x 2 x i8>, ptr %y
51  %b2 = load <vscale x 2 x i8>, ptr %z
52  %c = sext <vscale x 2 x i8> %a to <vscale x 2 x i16>
53  %d = sext <vscale x 2 x i8> %b to <vscale x 2 x i16>
54  %d2 = sext <vscale x 2 x i8> %b2 to <vscale x 2 x i16>
55  %e = mul <vscale x 2 x i16> %c, %d
56  %f = add <vscale x 2 x i16> %c, %d2
57  %g = sub <vscale x 2 x i16> %c, %d2
58  %h = or <vscale x 2 x i16> %e, %f
59  %i = or <vscale x 2 x i16> %h, %g
60  ret <vscale x 2 x i16> %i
61}
62
63define <vscale x 2 x i32> @vwop_vscale_sext_i16i32_multiple_users(ptr %x, ptr %y, ptr %z) {
64; NO_FOLDING-LABEL: vwop_vscale_sext_i16i32_multiple_users:
65; NO_FOLDING:       # %bb.0:
66; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
67; NO_FOLDING-NEXT:    vle16.v v8, (a0)
68; NO_FOLDING-NEXT:    vle16.v v9, (a1)
69; NO_FOLDING-NEXT:    vle16.v v10, (a2)
70; NO_FOLDING-NEXT:    vsext.vf2 v11, v8
71; NO_FOLDING-NEXT:    vsext.vf2 v8, v9
72; NO_FOLDING-NEXT:    vsext.vf2 v9, v10
73; NO_FOLDING-NEXT:    vmul.vv v8, v11, v8
74; NO_FOLDING-NEXT:    vadd.vv v10, v11, v9
75; NO_FOLDING-NEXT:    vsub.vv v9, v11, v9
76; NO_FOLDING-NEXT:    vor.vv v8, v8, v10
77; NO_FOLDING-NEXT:    vor.vv v8, v8, v9
78; NO_FOLDING-NEXT:    ret
79;
80; FOLDING-LABEL: vwop_vscale_sext_i16i32_multiple_users:
81; FOLDING:       # %bb.0:
82; FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
83; FOLDING-NEXT:    vle16.v v8, (a0)
84; FOLDING-NEXT:    vle16.v v9, (a1)
85; FOLDING-NEXT:    vle16.v v10, (a2)
86; FOLDING-NEXT:    vwmul.vv v11, v8, v9
87; FOLDING-NEXT:    vwadd.vv v9, v8, v10
88; FOLDING-NEXT:    vwsub.vv v12, v8, v10
89; FOLDING-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
90; FOLDING-NEXT:    vor.vv v8, v11, v9
91; FOLDING-NEXT:    vor.vv v8, v8, v12
92; FOLDING-NEXT:    ret
93  %a = load <vscale x 2 x i16>, ptr %x
94  %b = load <vscale x 2 x i16>, ptr %y
95  %b2 = load <vscale x 2 x i16>, ptr %z
96  %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
97  %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
98  %d2 = sext <vscale x 2 x i16> %b2 to <vscale x 2 x i32>
99  %e = mul <vscale x 2 x i32> %c, %d
100  %f = add <vscale x 2 x i32> %c, %d2
101  %g = sub <vscale x 2 x i32> %c, %d2
102  %h = or <vscale x 2 x i32> %e, %f
103  %i = or <vscale x 2 x i32> %h, %g
104  ret <vscale x 2 x i32> %i
105}
106
107define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
108; NO_FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
109; NO_FOLDING:       # %bb.0:
110; NO_FOLDING-NEXT:    vl1re32.v v8, (a0)
111; NO_FOLDING-NEXT:    vl1re32.v v9, (a1)
112; NO_FOLDING-NEXT:    vl1re32.v v10, (a2)
113; NO_FOLDING-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
114; NO_FOLDING-NEXT:    vsext.vf2 v12, v8
115; NO_FOLDING-NEXT:    vsext.vf2 v14, v9
116; NO_FOLDING-NEXT:    vsext.vf2 v8, v10
117; NO_FOLDING-NEXT:    vmul.vv v10, v12, v14
118; NO_FOLDING-NEXT:    vadd.vv v14, v12, v8
119; NO_FOLDING-NEXT:    vsub.vv v8, v12, v8
120; NO_FOLDING-NEXT:    vor.vv v10, v10, v14
121; NO_FOLDING-NEXT:    vor.vv v8, v10, v8
122; NO_FOLDING-NEXT:    ret
123;
124; FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
125; FOLDING:       # %bb.0:
126; FOLDING-NEXT:    vl1re32.v v8, (a0)
127; FOLDING-NEXT:    vl1re32.v v9, (a1)
128; FOLDING-NEXT:    vl1re32.v v10, (a2)
129; FOLDING-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
130; FOLDING-NEXT:    vwmul.vv v12, v8, v9
131; FOLDING-NEXT:    vwadd.vv v14, v8, v10
132; FOLDING-NEXT:    vwsub.vv v16, v8, v10
133; FOLDING-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
134; FOLDING-NEXT:    vor.vv v8, v12, v14
135; FOLDING-NEXT:    vor.vv v8, v8, v16
136; FOLDING-NEXT:    ret
137  %a = load <vscale x 2 x i32>, ptr %x
138  %b = load <vscale x 2 x i32>, ptr %y
139  %b2 = load <vscale x 2 x i32>, ptr %z
140  %c = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
141  %d = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
142  %d2 = sext <vscale x 2 x i32> %b2 to <vscale x 2 x i64>
143  %e = mul <vscale x 2 x i64> %c, %d
144  %f = add <vscale x 2 x i64> %c, %d2
145  %g = sub <vscale x 2 x i64> %c, %d2
146  %h = or <vscale x 2 x i64> %e, %f
147  %i = or <vscale x 2 x i64> %h, %g
148  ret <vscale x 2 x i64> %i
149}
150
151define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) {
152; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users:
153; NO_FOLDING:       # %bb.0:
154; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
155; NO_FOLDING-NEXT:    vlm.v v8, (a0)
156; NO_FOLDING-NEXT:    vlm.v v9, (a1)
157; NO_FOLDING-NEXT:    vlm.v v10, (a2)
158; NO_FOLDING-NEXT:    vmv.v.i v11, 0
159; NO_FOLDING-NEXT:    li a0, 1
160; NO_FOLDING-NEXT:    vmv.v.v v0, v8
161; NO_FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
162; NO_FOLDING-NEXT:    vmv.v.v v0, v9
163; NO_FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
164; NO_FOLDING-NEXT:    vmv.v.v v0, v10
165; NO_FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
166; NO_FOLDING-NEXT:    vmul.vv v9, v12, v9
167; NO_FOLDING-NEXT:    vsub.vv v11, v12, v10
168; NO_FOLDING-NEXT:    vmv.v.v v0, v8
169; NO_FOLDING-NEXT:    vsub.vx v10, v10, a0, v0.t
170; NO_FOLDING-NEXT:    vor.vv v8, v9, v10
171; NO_FOLDING-NEXT:    vor.vv v8, v8, v11
172; NO_FOLDING-NEXT:    ret
173;
174; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users:
175; FOLDING:       # %bb.0:
176; FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
177; FOLDING-NEXT:    vlm.v v8, (a0)
178; FOLDING-NEXT:    vlm.v v9, (a1)
179; FOLDING-NEXT:    vlm.v v10, (a2)
180; FOLDING-NEXT:    vmv.v.i v11, 0
181; FOLDING-NEXT:    li a0, 1
182; FOLDING-NEXT:    vmv.v.v v0, v8
183; FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
184; FOLDING-NEXT:    vmv.v.v v0, v9
185; FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
186; FOLDING-NEXT:    vmv.v.v v0, v10
187; FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
188; FOLDING-NEXT:    vmul.vv v9, v12, v9
189; FOLDING-NEXT:    vsub.vv v11, v12, v10
190; FOLDING-NEXT:    vmv.v.v v0, v8
191; FOLDING-NEXT:    vsub.vx v10, v10, a0, v0.t
192; FOLDING-NEXT:    vor.vv v8, v9, v10
193; FOLDING-NEXT:    vor.vv v8, v8, v11
194; FOLDING-NEXT:    ret
195  %a = load <vscale x 2 x i1>, ptr %x
196  %b = load <vscale x 2 x i1>, ptr %y
197  %b2 = load <vscale x 2 x i1>, ptr %z
198  %c = sext <vscale x 2 x i1> %a to <vscale x 2 x i32>
199  %d = sext <vscale x 2 x i1> %b to <vscale x 2 x i32>
200  %d2 = sext <vscale x 2 x i1> %b2 to <vscale x 2 x i32>
201  %e = mul <vscale x 2 x i32> %c, %d
202  %f = add <vscale x 2 x i32> %c, %d2
203  %g = sub <vscale x 2 x i32> %c, %d2
204  %h = or <vscale x 2 x i32> %e, %f
205  %i = or <vscale x 2 x i32> %h, %g
206  ret <vscale x 2 x i32> %i
207}
208
209define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) {
210; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users:
211; NO_FOLDING:       # %bb.0:
212; NO_FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
213; NO_FOLDING-NEXT:    vlm.v v8, (a0)
214; NO_FOLDING-NEXT:    vlm.v v9, (a1)
215; NO_FOLDING-NEXT:    vlm.v v10, (a2)
216; NO_FOLDING-NEXT:    vmv.v.i v11, 0
217; NO_FOLDING-NEXT:    li a0, 1
218; NO_FOLDING-NEXT:    vmv1r.v v0, v8
219; NO_FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
220; NO_FOLDING-NEXT:    vmv1r.v v0, v9
221; NO_FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
222; NO_FOLDING-NEXT:    vmv1r.v v0, v10
223; NO_FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
224; NO_FOLDING-NEXT:    vmul.vv v9, v12, v9
225; NO_FOLDING-NEXT:    vsub.vv v11, v12, v10
226; NO_FOLDING-NEXT:    vmv1r.v v0, v8
227; NO_FOLDING-NEXT:    vsub.vx v10, v10, a0, v0.t
228; NO_FOLDING-NEXT:    vor.vv v8, v9, v10
229; NO_FOLDING-NEXT:    vor.vv v8, v8, v11
230; NO_FOLDING-NEXT:    ret
231;
232; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users:
233; FOLDING:       # %bb.0:
234; FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
235; FOLDING-NEXT:    vlm.v v8, (a0)
236; FOLDING-NEXT:    vlm.v v9, (a1)
237; FOLDING-NEXT:    vlm.v v10, (a2)
238; FOLDING-NEXT:    vmv.v.i v11, 0
239; FOLDING-NEXT:    li a0, 1
240; FOLDING-NEXT:    vmv1r.v v0, v8
241; FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
242; FOLDING-NEXT:    vmv1r.v v0, v9
243; FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
244; FOLDING-NEXT:    vmv1r.v v0, v10
245; FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
246; FOLDING-NEXT:    vmul.vv v9, v12, v9
247; FOLDING-NEXT:    vsub.vv v11, v12, v10
248; FOLDING-NEXT:    vmv1r.v v0, v8
249; FOLDING-NEXT:    vsub.vx v10, v10, a0, v0.t
250; FOLDING-NEXT:    vor.vv v8, v9, v10
251; FOLDING-NEXT:    vor.vv v8, v8, v11
252; FOLDING-NEXT:    ret
253  %a = load <vscale x 2 x i1>, ptr %x
254  %b = load <vscale x 2 x i1>, ptr %y
255  %b2 = load <vscale x 2 x i1>, ptr %z
256  %c = sext <vscale x 2 x i1> %a to <vscale x 2 x i8>
257  %d = sext <vscale x 2 x i1> %b to <vscale x 2 x i8>
258  %d2 = sext <vscale x 2 x i1> %b2 to <vscale x 2 x i8>
259  %e = mul <vscale x 2 x i8> %c, %d
260  %f = add <vscale x 2 x i8> %c, %d2
261  %g = sub <vscale x 2 x i8> %c, %d2
262  %h = or <vscale x 2 x i8> %e, %f
263  %i = or <vscale x 2 x i8> %h, %g
264  ret <vscale x 2 x i8> %i
265}
266
267define <vscale x 2 x i32> @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y, ptr %z) {
268; NO_FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
269; NO_FOLDING:       # %bb.0:
270; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
271; NO_FOLDING-NEXT:    vle8.v v8, (a0)
272; NO_FOLDING-NEXT:    vle8.v v9, (a1)
273; NO_FOLDING-NEXT:    vle8.v v10, (a2)
274; NO_FOLDING-NEXT:    vsext.vf4 v11, v8
275; NO_FOLDING-NEXT:    vsext.vf4 v8, v9
276; NO_FOLDING-NEXT:    vsext.vf4 v9, v10
277; NO_FOLDING-NEXT:    vmul.vv v8, v11, v8
278; NO_FOLDING-NEXT:    vadd.vv v10, v11, v9
279; NO_FOLDING-NEXT:    vsub.vv v9, v11, v9
280; NO_FOLDING-NEXT:    vor.vv v8, v8, v10
281; NO_FOLDING-NEXT:    vor.vv v8, v8, v9
282; NO_FOLDING-NEXT:    ret
283;
284; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
285; FOLDING:       # %bb.0:
286; FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
287; FOLDING-NEXT:    vle8.v v8, (a0)
288; FOLDING-NEXT:    vle8.v v9, (a1)
289; FOLDING-NEXT:    vle8.v v10, (a2)
290; FOLDING-NEXT:    vsext.vf2 v11, v8
291; FOLDING-NEXT:    vsext.vf2 v8, v9
292; FOLDING-NEXT:    vsext.vf2 v9, v10
293; FOLDING-NEXT:    vwmul.vv v10, v11, v8
294; FOLDING-NEXT:    vwadd.vv v8, v11, v9
295; FOLDING-NEXT:    vwsub.vv v12, v11, v9
296; FOLDING-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
297; FOLDING-NEXT:    vor.vv v8, v10, v8
298; FOLDING-NEXT:    vor.vv v8, v8, v12
299; FOLDING-NEXT:    ret
300  %a = load <vscale x 2 x i8>, ptr %x
301  %b = load <vscale x 2 x i8>, ptr %y
302  %b2 = load <vscale x 2 x i8>, ptr %z
303  %c = sext <vscale x 2 x i8> %a to <vscale x 2 x i32>
304  %d = sext <vscale x 2 x i8> %b to <vscale x 2 x i32>
305  %d2 = sext <vscale x 2 x i8> %b2 to <vscale x 2 x i32>
306  %e = mul <vscale x 2 x i32> %c, %d
307  %f = add <vscale x 2 x i32> %c, %d2
308  %g = sub <vscale x 2 x i32> %c, %d2
309  %h = or <vscale x 2 x i32> %e, %f
310  %i = or <vscale x 2 x i32> %h, %g
311  ret <vscale x 2 x i32> %i
312}
313
314define <vscale x 2 x i16> @vwop_vscale_zext_i8i16_multiple_users(ptr %x, ptr %y, ptr %z) {
315; NO_FOLDING-LABEL: vwop_vscale_zext_i8i16_multiple_users:
316; NO_FOLDING:       # %bb.0:
317; NO_FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
318; NO_FOLDING-NEXT:    vle8.v v8, (a0)
319; NO_FOLDING-NEXT:    vle8.v v9, (a1)
320; NO_FOLDING-NEXT:    vle8.v v10, (a2)
321; NO_FOLDING-NEXT:    vzext.vf2 v11, v8
322; NO_FOLDING-NEXT:    vzext.vf2 v8, v9
323; NO_FOLDING-NEXT:    vzext.vf2 v9, v10
324; NO_FOLDING-NEXT:    vmul.vv v8, v11, v8
325; NO_FOLDING-NEXT:    vadd.vv v10, v11, v9
326; NO_FOLDING-NEXT:    vsub.vv v9, v11, v9
327; NO_FOLDING-NEXT:    vor.vv v8, v8, v10
328; NO_FOLDING-NEXT:    vor.vv v8, v8, v9
329; NO_FOLDING-NEXT:    ret
330;
331; FOLDING-LABEL: vwop_vscale_zext_i8i16_multiple_users:
332; FOLDING:       # %bb.0:
333; FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
334; FOLDING-NEXT:    vle8.v v8, (a0)
335; FOLDING-NEXT:    vle8.v v9, (a1)
336; FOLDING-NEXT:    vle8.v v10, (a2)
337; FOLDING-NEXT:    vwmulu.vv v11, v8, v9
338; FOLDING-NEXT:    vwaddu.vv v9, v8, v10
339; FOLDING-NEXT:    vwsubu.vv v12, v8, v10
340; FOLDING-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
341; FOLDING-NEXT:    vor.vv v8, v11, v9
342; FOLDING-NEXT:    vor.vv v8, v8, v12
343; FOLDING-NEXT:    ret
344  %a = load <vscale x 2 x i8>, ptr %x
345  %b = load <vscale x 2 x i8>, ptr %y
346  %b2 = load <vscale x 2 x i8>, ptr %z
347  %c = zext <vscale x 2 x i8> %a to <vscale x 2 x i16>
348  %d = zext <vscale x 2 x i8> %b to <vscale x 2 x i16>
349  %d2 = zext <vscale x 2 x i8> %b2 to <vscale x 2 x i16>
350  %e = mul <vscale x 2 x i16> %c, %d
351  %f = add <vscale x 2 x i16> %c, %d2
352  %g = sub <vscale x 2 x i16> %c, %d2
353  %h = or <vscale x 2 x i16> %e, %f
354  %i = or <vscale x 2 x i16> %h, %g
355  ret <vscale x 2 x i16> %i
356}
357
358define <vscale x 2 x i32> @vwop_vscale_zext_i16i32_multiple_users(ptr %x, ptr %y, ptr %z) {
359; NO_FOLDING-LABEL: vwop_vscale_zext_i16i32_multiple_users:
360; NO_FOLDING:       # %bb.0:
361; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
362; NO_FOLDING-NEXT:    vle16.v v8, (a0)
363; NO_FOLDING-NEXT:    vle16.v v9, (a1)
364; NO_FOLDING-NEXT:    vle16.v v10, (a2)
365; NO_FOLDING-NEXT:    vzext.vf2 v11, v8
366; NO_FOLDING-NEXT:    vzext.vf2 v8, v9
367; NO_FOLDING-NEXT:    vzext.vf2 v9, v10
368; NO_FOLDING-NEXT:    vmul.vv v8, v11, v8
369; NO_FOLDING-NEXT:    vadd.vv v10, v11, v9
370; NO_FOLDING-NEXT:    vsub.vv v9, v11, v9
371; NO_FOLDING-NEXT:    vor.vv v8, v8, v10
372; NO_FOLDING-NEXT:    vor.vv v8, v8, v9
373; NO_FOLDING-NEXT:    ret
374;
375; FOLDING-LABEL: vwop_vscale_zext_i16i32_multiple_users:
376; FOLDING:       # %bb.0:
377; FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
378; FOLDING-NEXT:    vle16.v v8, (a0)
379; FOLDING-NEXT:    vle16.v v9, (a1)
380; FOLDING-NEXT:    vle16.v v10, (a2)
381; FOLDING-NEXT:    vwmulu.vv v11, v8, v9
382; FOLDING-NEXT:    vwaddu.vv v9, v8, v10
383; FOLDING-NEXT:    vwsubu.vv v12, v8, v10
384; FOLDING-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
385; FOLDING-NEXT:    vor.vv v8, v11, v9
386; FOLDING-NEXT:    vor.vv v8, v8, v12
387; FOLDING-NEXT:    ret
388  %a = load <vscale x 2 x i16>, ptr %x
389  %b = load <vscale x 2 x i16>, ptr %y
390  %b2 = load <vscale x 2 x i16>, ptr %z
391  %c = zext <vscale x 2 x i16> %a to <vscale x 2 x i32>
392  %d = zext <vscale x 2 x i16> %b to <vscale x 2 x i32>
393  %d2 = zext <vscale x 2 x i16> %b2 to <vscale x 2 x i32>
394  %e = mul <vscale x 2 x i32> %c, %d
395  %f = add <vscale x 2 x i32> %c, %d2
396  %g = sub <vscale x 2 x i32> %c, %d2
397  %h = or <vscale x 2 x i32> %e, %f
398  %i = or <vscale x 2 x i32> %h, %g
399  ret <vscale x 2 x i32> %i
400}
401
402define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
403; NO_FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
404; NO_FOLDING:       # %bb.0:
405; NO_FOLDING-NEXT:    vl1re32.v v8, (a0)
406; NO_FOLDING-NEXT:    vl1re32.v v9, (a1)
407; NO_FOLDING-NEXT:    vl1re32.v v10, (a2)
408; NO_FOLDING-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
409; NO_FOLDING-NEXT:    vzext.vf2 v12, v8
410; NO_FOLDING-NEXT:    vzext.vf2 v14, v9
411; NO_FOLDING-NEXT:    vzext.vf2 v8, v10
412; NO_FOLDING-NEXT:    vmul.vv v10, v12, v14
413; NO_FOLDING-NEXT:    vadd.vv v14, v12, v8
414; NO_FOLDING-NEXT:    vsub.vv v8, v12, v8
415; NO_FOLDING-NEXT:    vor.vv v10, v10, v14
416; NO_FOLDING-NEXT:    vor.vv v8, v10, v8
417; NO_FOLDING-NEXT:    ret
418;
419; FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
420; FOLDING:       # %bb.0:
421; FOLDING-NEXT:    vl1re32.v v8, (a0)
422; FOLDING-NEXT:    vl1re32.v v9, (a1)
423; FOLDING-NEXT:    vl1re32.v v10, (a2)
424; FOLDING-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
425; FOLDING-NEXT:    vwmulu.vv v12, v8, v9
426; FOLDING-NEXT:    vwaddu.vv v14, v8, v10
427; FOLDING-NEXT:    vwsubu.vv v16, v8, v10
428; FOLDING-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
429; FOLDING-NEXT:    vor.vv v8, v12, v14
430; FOLDING-NEXT:    vor.vv v8, v8, v16
431; FOLDING-NEXT:    ret
432  %a = load <vscale x 2 x i32>, ptr %x
433  %b = load <vscale x 2 x i32>, ptr %y
434  %b2 = load <vscale x 2 x i32>, ptr %z
435  %c = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
436  %d = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
437  %d2 = zext <vscale x 2 x i32> %b2 to <vscale x 2 x i64>
438  %e = mul <vscale x 2 x i64> %c, %d
439  %f = add <vscale x 2 x i64> %c, %d2
440  %g = sub <vscale x 2 x i64> %c, %d2
441  %h = or <vscale x 2 x i64> %e, %f
442  %i = or <vscale x 2 x i64> %h, %g
443  ret <vscale x 2 x i64> %i
444}
445
446define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) {
447; NO_FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users:
448; NO_FOLDING:       # %bb.0:
449; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
450; NO_FOLDING-NEXT:    vlm.v v0, (a0)
451; NO_FOLDING-NEXT:    vlm.v v8, (a2)
452; NO_FOLDING-NEXT:    vlm.v v9, (a1)
453; NO_FOLDING-NEXT:    vmv.v.i v10, 0
454; NO_FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
455; NO_FOLDING-NEXT:    vmv.v.v v0, v8
456; NO_FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
457; NO_FOLDING-NEXT:    vadd.vv v10, v11, v8
458; NO_FOLDING-NEXT:    vsub.vv v8, v11, v8
459; NO_FOLDING-NEXT:    vmv.v.v v0, v9
460; NO_FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
461; NO_FOLDING-NEXT:    vor.vv v8, v10, v8
462; NO_FOLDING-NEXT:    ret
463;
464; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users:
465; FOLDING:       # %bb.0:
466; FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
467; FOLDING-NEXT:    vlm.v v0, (a0)
468; FOLDING-NEXT:    vlm.v v8, (a2)
469; FOLDING-NEXT:    vlm.v v9, (a1)
470; FOLDING-NEXT:    vmv.v.i v10, 0
471; FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
472; FOLDING-NEXT:    vmv.v.v v0, v8
473; FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
474; FOLDING-NEXT:    vadd.vv v10, v11, v8
475; FOLDING-NEXT:    vsub.vv v8, v11, v8
476; FOLDING-NEXT:    vmv.v.v v0, v9
477; FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
478; FOLDING-NEXT:    vor.vv v8, v10, v8
479; FOLDING-NEXT:    ret
480  %a = load <vscale x 2 x i1>, ptr %x
481  %b = load <vscale x 2 x i1>, ptr %y
482  %b2 = load <vscale x 2 x i1>, ptr %z
483  %c = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
484  %d = zext <vscale x 2 x i1> %b to <vscale x 2 x i32>
485  %d2 = zext <vscale x 2 x i1> %b2 to <vscale x 2 x i32>
486  %e = mul <vscale x 2 x i32> %c, %d
487  %f = add <vscale x 2 x i32> %c, %d2
488  %g = sub <vscale x 2 x i32> %c, %d2
489  %h = or <vscale x 2 x i32> %e, %f
490  %i = or <vscale x 2 x i32> %h, %g
491  ret <vscale x 2 x i32> %i
492}
493
494define <vscale x 2 x i8> @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) {
495; NO_FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users:
496; NO_FOLDING:       # %bb.0:
497; NO_FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
498; NO_FOLDING-NEXT:    vlm.v v0, (a0)
499; NO_FOLDING-NEXT:    vlm.v v8, (a2)
500; NO_FOLDING-NEXT:    vlm.v v9, (a1)
501; NO_FOLDING-NEXT:    vmv.v.i v10, 0
502; NO_FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
503; NO_FOLDING-NEXT:    vmv1r.v v0, v8
504; NO_FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
505; NO_FOLDING-NEXT:    vadd.vv v10, v11, v8
506; NO_FOLDING-NEXT:    vsub.vv v8, v11, v8
507; NO_FOLDING-NEXT:    vmv1r.v v0, v9
508; NO_FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
509; NO_FOLDING-NEXT:    vor.vv v8, v10, v8
510; NO_FOLDING-NEXT:    ret
511;
512; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users:
513; FOLDING:       # %bb.0:
514; FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
515; FOLDING-NEXT:    vlm.v v0, (a0)
516; FOLDING-NEXT:    vlm.v v8, (a2)
517; FOLDING-NEXT:    vlm.v v9, (a1)
518; FOLDING-NEXT:    vmv.v.i v10, 0
519; FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
520; FOLDING-NEXT:    vmv1r.v v0, v8
521; FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
522; FOLDING-NEXT:    vadd.vv v10, v11, v8
523; FOLDING-NEXT:    vsub.vv v8, v11, v8
524; FOLDING-NEXT:    vmv1r.v v0, v9
525; FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
526; FOLDING-NEXT:    vor.vv v8, v10, v8
527; FOLDING-NEXT:    ret
528  %a = load <vscale x 2 x i1>, ptr %x
529  %b = load <vscale x 2 x i1>, ptr %y
530  %b2 = load <vscale x 2 x i1>, ptr %z
531  %c = zext <vscale x 2 x i1> %a to <vscale x 2 x i8>
532  %d = zext <vscale x 2 x i1> %b to <vscale x 2 x i8>
533  %d2 = zext <vscale x 2 x i1> %b2 to <vscale x 2 x i8>
534  %e = mul <vscale x 2 x i8> %c, %d
535  %f = add <vscale x 2 x i8> %c, %d2
536  %g = sub <vscale x 2 x i8> %c, %d2
537  %h = or <vscale x 2 x i8> %e, %f
538  %i = or <vscale x 2 x i8> %h, %g
539  ret <vscale x 2 x i8> %i
540}
541
542define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y, ptr %z) {
543; NO_FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
544; NO_FOLDING:       # %bb.0:
545; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
546; NO_FOLDING-NEXT:    vle8.v v8, (a0)
547; NO_FOLDING-NEXT:    vle8.v v9, (a1)
548; NO_FOLDING-NEXT:    vle8.v v10, (a2)
549; NO_FOLDING-NEXT:    vzext.vf4 v11, v8
550; NO_FOLDING-NEXT:    vzext.vf4 v8, v9
551; NO_FOLDING-NEXT:    vzext.vf4 v9, v10
552; NO_FOLDING-NEXT:    vmul.vv v8, v11, v8
553; NO_FOLDING-NEXT:    vadd.vv v10, v11, v9
554; NO_FOLDING-NEXT:    vsub.vv v9, v11, v9
555; NO_FOLDING-NEXT:    vor.vv v8, v8, v10
556; NO_FOLDING-NEXT:    vor.vv v8, v8, v9
557; NO_FOLDING-NEXT:    ret
558;
559; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
560; FOLDING:       # %bb.0:
561; FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
562; FOLDING-NEXT:    vle8.v v8, (a0)
563; FOLDING-NEXT:    vle8.v v9, (a1)
564; FOLDING-NEXT:    vle8.v v10, (a2)
565; FOLDING-NEXT:    vzext.vf2 v11, v8
566; FOLDING-NEXT:    vzext.vf2 v8, v9
567; FOLDING-NEXT:    vzext.vf2 v9, v10
568; FOLDING-NEXT:    vwmulu.vv v10, v11, v8
569; FOLDING-NEXT:    vwaddu.vv v8, v11, v9
570; FOLDING-NEXT:    vwsubu.vv v12, v11, v9
571; FOLDING-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
572; FOLDING-NEXT:    vor.vv v8, v10, v8
573; FOLDING-NEXT:    vor.vv v8, v8, v12
574; FOLDING-NEXT:    ret
575  %a = load <vscale x 2 x i8>, ptr %x
576  %b = load <vscale x 2 x i8>, ptr %y
577  %b2 = load <vscale x 2 x i8>, ptr %z
578  %c = zext <vscale x 2 x i8> %a to <vscale x 2 x i32>
579  %d = zext <vscale x 2 x i8> %b to <vscale x 2 x i32>
580  %d2 = zext <vscale x 2 x i8> %b2 to <vscale x 2 x i32>
581  %e = mul <vscale x 2 x i32> %c, %d
582  %f = add <vscale x 2 x i32> %c, %d2
583  %g = sub <vscale x 2 x i32> %c, %d2
584  %h = or <vscale x 2 x i32> %e, %f
585  %i = or <vscale x 2 x i32> %h, %g
586  ret <vscale x 2 x i32> %i
587}
588
589
590
591;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
592; RV32: {{.*}}
593; RV64: {{.*}}
594