xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
8declare <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
9declare <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
10declare <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
11
12define <2 x i8> @vmacc_vv_nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c,  <2 x i1> %m, i32 zeroext %evl) {
13; CHECK-LABEL: vmacc_vv_nxv2i8:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
16; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
17; CHECK-NEXT:    vmv1r.v v8, v10
18; CHECK-NEXT:    ret
19  %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl)
20  %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl)
21  %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
22  ret <2 x i8> %u
23}
24
25define <2 x i8> @vmacc_vv_nxv2i8_unmasked(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c,  <2 x i1> %m, i32 zeroext %evl) {
26; CHECK-LABEL: vmacc_vv_nxv2i8_unmasked:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
29; CHECK-NEXT:    vmacc.vv v10, v8, v9
30; CHECK-NEXT:    vmv1r.v v8, v10
31; CHECK-NEXT:    ret
32  %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl)
33  %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl)
34  %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> splat (i1 -1), <2 x i8> %y, <2 x i8> %c, i32 %evl)
35  ret <2 x i8> %u
36}
37
38define <2 x i8> @vmacc_vx_nxv2i8(<2 x i8> %a, i8 %b, <2 x i8> %c,  <2 x i1> %m, i32 zeroext %evl) {
39; CHECK-LABEL: vmacc_vx_nxv2i8:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
42; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
43; CHECK-NEXT:    vmv1r.v v8, v9
44; CHECK-NEXT:    ret
45  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
46  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
47  %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl)
48  %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl)
49  %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
50  ret <2 x i8> %u
51}
52
53define <2 x i8> @vmacc_vx_nxv2i8_unmasked(<2 x i8> %a, i8 %b, <2 x i8> %c,  <2 x i1> %m, i32 zeroext %evl) {
54; CHECK-LABEL: vmacc_vx_nxv2i8_unmasked:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
57; CHECK-NEXT:    vmacc.vx v9, a0, v8
58; CHECK-NEXT:    vmv1r.v v8, v9
59; CHECK-NEXT:    ret
60  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
61  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
62  %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl)
63  %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl)
64  %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> splat (i1 -1), <2 x i8> %y, <2 x i8> %c, i32 %evl)
65  ret <2 x i8> %u
66}
67
68define <2 x i8> @vmacc_vv_nxv2i8_ta(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c,  <2 x i1> %m, i32 zeroext %evl) {
69; CHECK-LABEL: vmacc_vv_nxv2i8_ta:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
72; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
73; CHECK-NEXT:    vmv1r.v v8, v10
74; CHECK-NEXT:    ret
75  %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl)
76  %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl)
77  %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
78  ret <2 x i8> %u
79}
80
81define <2 x i8> @vmacc_vx_nxv2i8_ta(<2 x i8> %a, i8 %b, <2 x i8> %c,  <2 x i1> %m, i32 zeroext %evl) {
82; CHECK-LABEL: vmacc_vx_nxv2i8_ta:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
85; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
86; CHECK-NEXT:    vmv1r.v v8, v9
87; CHECK-NEXT:    ret
88  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
89  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
90  %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl)
91  %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl)
92  %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
93  ret <2 x i8> %u
94}
95
96declare <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
97declare <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
98declare <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32)
99declare <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32)
100
101define <4 x i8> @vmacc_vv_nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c,  <4 x i1> %m, i32 zeroext %evl) {
102; CHECK-LABEL: vmacc_vv_nxv4i8:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
105; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
106; CHECK-NEXT:    vmv1r.v v8, v10
107; CHECK-NEXT:    ret
108  %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl)
109  %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl)
110  %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
111  ret <4 x i8> %u
112}
113
114define <4 x i8> @vmacc_vv_nxv4i8_unmasked(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c,  <4 x i1> %m, i32 zeroext %evl) {
115; CHECK-LABEL: vmacc_vv_nxv4i8_unmasked:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, ma
118; CHECK-NEXT:    vmacc.vv v10, v8, v9
119; CHECK-NEXT:    vmv1r.v v8, v10
120; CHECK-NEXT:    ret
121  %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl)
122  %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl)
123  %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> splat (i1 -1), <4 x i8> %y, <4 x i8> %c, i32 %evl)
124  ret <4 x i8> %u
125}
126
127define <4 x i8> @vmacc_vx_nxv4i8(<4 x i8> %a, i8 %b, <4 x i8> %c,  <4 x i1> %m, i32 zeroext %evl) {
128; CHECK-LABEL: vmacc_vx_nxv4i8:
129; CHECK:       # %bb.0:
130; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, mu
131; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
132; CHECK-NEXT:    vmv1r.v v8, v9
133; CHECK-NEXT:    ret
134  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
135  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
136  %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl)
137  %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl)
138  %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
139  ret <4 x i8> %u
140}
141
142define <4 x i8> @vmacc_vx_nxv4i8_unmasked(<4 x i8> %a, i8 %b, <4 x i8> %c,  <4 x i1> %m, i32 zeroext %evl) {
143; CHECK-LABEL: vmacc_vx_nxv4i8_unmasked:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, ma
146; CHECK-NEXT:    vmacc.vx v9, a0, v8
147; CHECK-NEXT:    vmv1r.v v8, v9
148; CHECK-NEXT:    ret
149  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
150  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
151  %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl)
152  %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl)
153  %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> splat (i1 -1), <4 x i8> %y, <4 x i8> %c, i32 %evl)
154  ret <4 x i8> %u
155}
156
157define <4 x i8> @vmacc_vv_nxv4i8_ta(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c,  <4 x i1> %m, i32 zeroext %evl) {
158; CHECK-LABEL: vmacc_vv_nxv4i8_ta:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
161; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
162; CHECK-NEXT:    vmv1r.v v8, v10
163; CHECK-NEXT:    ret
164  %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl)
165  %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl)
166  %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
167  ret <4 x i8> %u
168}
169
170define <4 x i8> @vmacc_vx_nxv4i8_ta(<4 x i8> %a, i8 %b, <4 x i8> %c,  <4 x i1> %m, i32 zeroext %evl) {
171; CHECK-LABEL: vmacc_vx_nxv4i8_ta:
172; CHECK:       # %bb.0:
173; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
174; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
175; CHECK-NEXT:    vmv1r.v v8, v9
176; CHECK-NEXT:    ret
177  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
178  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
179  %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl)
180  %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl)
181  %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
182  ret <4 x i8> %u
183}
184
185declare <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
186declare <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
187declare <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
188declare <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
189
190define <8 x i8> @vmacc_vv_nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c,  <8 x i1> %m, i32 zeroext %evl) {
191; CHECK-LABEL: vmacc_vv_nxv8i8:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
194; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
195; CHECK-NEXT:    vmv1r.v v8, v10
196; CHECK-NEXT:    ret
197  %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl)
198  %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl)
199  %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
200  ret <8 x i8> %u
201}
202
203define <8 x i8> @vmacc_vv_nxv8i8_unmasked(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c,  <8 x i1> %m, i32 zeroext %evl) {
204; CHECK-LABEL: vmacc_vv_nxv8i8_unmasked:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
207; CHECK-NEXT:    vmacc.vv v10, v8, v9
208; CHECK-NEXT:    vmv1r.v v8, v10
209; CHECK-NEXT:    ret
210  %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl)
211  %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl)
212  %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> splat (i1 -1), <8 x i8> %y, <8 x i8> %c, i32 %evl)
213  ret <8 x i8> %u
214}
215
216define <8 x i8> @vmacc_vx_nxv8i8(<8 x i8> %a, i8 %b, <8 x i8> %c,  <8 x i1> %m, i32 zeroext %evl) {
217; CHECK-LABEL: vmacc_vx_nxv8i8:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
220; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
221; CHECK-NEXT:    vmv1r.v v8, v9
222; CHECK-NEXT:    ret
223  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
224  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
225  %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl)
226  %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl)
227  %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
228  ret <8 x i8> %u
229}
230
231define <8 x i8> @vmacc_vx_nxv8i8_unmasked(<8 x i8> %a, i8 %b, <8 x i8> %c,  <8 x i1> %m, i32 zeroext %evl) {
232; CHECK-LABEL: vmacc_vx_nxv8i8_unmasked:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
235; CHECK-NEXT:    vmacc.vx v9, a0, v8
236; CHECK-NEXT:    vmv1r.v v8, v9
237; CHECK-NEXT:    ret
238  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
239  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
240  %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl)
241  %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl)
242  %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> splat (i1 -1), <8 x i8> %y, <8 x i8> %c, i32 %evl)
243  ret <8 x i8> %u
244}
245
246define <8 x i8> @vmacc_vv_nxv8i8_ta(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c,  <8 x i1> %m, i32 zeroext %evl) {
247; CHECK-LABEL: vmacc_vv_nxv8i8_ta:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
250; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
251; CHECK-NEXT:    vmv1r.v v8, v10
252; CHECK-NEXT:    ret
253  %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl)
254  %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl)
255  %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
256  ret <8 x i8> %u
257}
258
259define <8 x i8> @vmacc_vx_nxv8i8_ta(<8 x i8> %a, i8 %b, <8 x i8> %c,  <8 x i1> %m, i32 zeroext %evl) {
260; CHECK-LABEL: vmacc_vx_nxv8i8_ta:
261; CHECK:       # %bb.0:
262; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
263; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
264; CHECK-NEXT:    vmv1r.v v8, v9
265; CHECK-NEXT:    ret
266  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
267  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
268  %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl)
269  %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl)
270  %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
271  ret <8 x i8> %u
272}
273
274declare <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
275declare <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
276declare <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32)
277declare <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32)
278
279define <16 x i8> @vmacc_vv_nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,  <16 x i1> %m, i32 zeroext %evl) {
280; CHECK-LABEL: vmacc_vv_nxv16i8:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
283; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
284; CHECK-NEXT:    vmv1r.v v8, v10
285; CHECK-NEXT:    ret
286  %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl)
287  %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl)
288  %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
289  ret <16 x i8> %u
290}
291
292define <16 x i8> @vmacc_vv_nxv16i8_unmasked(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,  <16 x i1> %m, i32 zeroext %evl) {
293; CHECK-LABEL: vmacc_vv_nxv16i8_unmasked:
294; CHECK:       # %bb.0:
295; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
296; CHECK-NEXT:    vmacc.vv v10, v8, v9
297; CHECK-NEXT:    vmv1r.v v8, v10
298; CHECK-NEXT:    ret
299  %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl)
300  %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl)
301  %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> splat (i1 -1), <16 x i8> %y, <16 x i8> %c, i32 %evl)
302  ret <16 x i8> %u
303}
304
305define <16 x i8> @vmacc_vx_nxv16i8(<16 x i8> %a, i8 %b, <16 x i8> %c,  <16 x i1> %m, i32 zeroext %evl) {
306; CHECK-LABEL: vmacc_vx_nxv16i8:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, mu
309; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
310; CHECK-NEXT:    vmv1r.v v8, v9
311; CHECK-NEXT:    ret
312  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
313  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
314  %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl)
315  %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl)
316  %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
317  ret <16 x i8> %u
318}
319
320define <16 x i8> @vmacc_vx_nxv16i8_unmasked(<16 x i8> %a, i8 %b, <16 x i8> %c,  <16 x i1> %m, i32 zeroext %evl) {
321; CHECK-LABEL: vmacc_vx_nxv16i8_unmasked:
322; CHECK:       # %bb.0:
323; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
324; CHECK-NEXT:    vmacc.vx v9, a0, v8
325; CHECK-NEXT:    vmv1r.v v8, v9
326; CHECK-NEXT:    ret
327  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
328  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
329  %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl)
330  %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl)
331  %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> splat (i1 -1), <16 x i8> %y, <16 x i8> %c, i32 %evl)
332  ret <16 x i8> %u
333}
334
335define <16 x i8> @vmacc_vv_nxv16i8_ta(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,  <16 x i1> %m, i32 zeroext %evl) {
336; CHECK-LABEL: vmacc_vv_nxv16i8_ta:
337; CHECK:       # %bb.0:
338; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
339; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
340; CHECK-NEXT:    vmv.v.v v8, v10
341; CHECK-NEXT:    ret
342  %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl)
343  %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl)
344  %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
345  ret <16 x i8> %u
346}
347
348define <16 x i8> @vmacc_vx_nxv16i8_ta(<16 x i8> %a, i8 %b, <16 x i8> %c,  <16 x i1> %m, i32 zeroext %evl) {
349; CHECK-LABEL: vmacc_vx_nxv16i8_ta:
350; CHECK:       # %bb.0:
351; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
352; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
353; CHECK-NEXT:    vmv.v.v v8, v9
354; CHECK-NEXT:    ret
355  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
356  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
357  %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl)
358  %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl)
359  %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
360  ret <16 x i8> %u
361}
362
363declare <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32)
364declare <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32)
365declare <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32)
366declare <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32)
367
368define <32 x i8> @vmacc_vv_nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c,  <32 x i1> %m, i32 zeroext %evl) {
369; CHECK-LABEL: vmacc_vv_nxv32i8:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
372; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
373; CHECK-NEXT:    vmv2r.v v8, v12
374; CHECK-NEXT:    ret
375  %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl)
376  %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl)
377  %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
378  ret <32 x i8> %u
379}
380
381define <32 x i8> @vmacc_vv_nxv32i8_unmasked(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c,  <32 x i1> %m, i32 zeroext %evl) {
382; CHECK-LABEL: vmacc_vv_nxv32i8_unmasked:
383; CHECK:       # %bb.0:
384; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, ma
385; CHECK-NEXT:    vmacc.vv v12, v8, v10
386; CHECK-NEXT:    vmv2r.v v8, v12
387; CHECK-NEXT:    ret
388  %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl)
389  %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl)
390  %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> splat (i1 -1), <32 x i8> %y, <32 x i8> %c, i32 %evl)
391  ret <32 x i8> %u
392}
393
394define <32 x i8> @vmacc_vx_nxv32i8(<32 x i8> %a, i8 %b, <32 x i8> %c,  <32 x i1> %m, i32 zeroext %evl) {
395; CHECK-LABEL: vmacc_vx_nxv32i8:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, mu
398; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
399; CHECK-NEXT:    vmv2r.v v8, v10
400; CHECK-NEXT:    ret
401  %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0
402  %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer
403  %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl)
404  %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl)
405  %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
406  ret <32 x i8> %u
407}
408
409define <32 x i8> @vmacc_vx_nxv32i8_unmasked(<32 x i8> %a, i8 %b, <32 x i8> %c,  <32 x i1> %m, i32 zeroext %evl) {
410; CHECK-LABEL: vmacc_vx_nxv32i8_unmasked:
411; CHECK:       # %bb.0:
412; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, ma
413; CHECK-NEXT:    vmacc.vx v10, a0, v8
414; CHECK-NEXT:    vmv2r.v v8, v10
415; CHECK-NEXT:    ret
416  %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0
417  %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer
418  %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl)
419  %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl)
420  %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> splat (i1 -1), <32 x i8> %y, <32 x i8> %c, i32 %evl)
421  ret <32 x i8> %u
422}
423
424define <32 x i8> @vmacc_vv_nxv32i8_ta(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c,  <32 x i1> %m, i32 zeroext %evl) {
425; CHECK-LABEL: vmacc_vv_nxv32i8_ta:
426; CHECK:       # %bb.0:
427; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
428; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
429; CHECK-NEXT:    vmv.v.v v8, v12
430; CHECK-NEXT:    ret
431  %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl)
432  %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl)
433  %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
434  ret <32 x i8> %u
435}
436
437define <32 x i8> @vmacc_vx_nxv32i8_ta(<32 x i8> %a, i8 %b, <32 x i8> %c,  <32 x i1> %m, i32 zeroext %evl) {
438; CHECK-LABEL: vmacc_vx_nxv32i8_ta:
439; CHECK:       # %bb.0:
440; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
441; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
442; CHECK-NEXT:    vmv.v.v v8, v10
443; CHECK-NEXT:    ret
444  %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0
445  %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer
446  %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl)
447  %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl)
448  %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
449  ret <32 x i8> %u
450}
451
452declare <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32)
453declare <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32)
454declare <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32)
455declare <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32)
456
457define <64 x i8> @vmacc_vv_nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c,  <64 x i1> %m, i32 zeroext %evl) {
458; CHECK-LABEL: vmacc_vv_nxv64i8:
459; CHECK:       # %bb.0:
460; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
461; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
462; CHECK-NEXT:    vmv4r.v v8, v16
463; CHECK-NEXT:    ret
464  %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl)
465  %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl)
466  %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
467  ret <64 x i8> %u
468}
469
470define <64 x i8> @vmacc_vv_nxv64i8_unmasked(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c,  <64 x i1> %m, i32 zeroext %evl) {
471; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked:
472; CHECK:       # %bb.0:
473; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, ma
474; CHECK-NEXT:    vmacc.vv v16, v8, v12
475; CHECK-NEXT:    vmv4r.v v8, v16
476; CHECK-NEXT:    ret
477  %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl)
478  %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl)
479  %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> splat (i1 -1), <64 x i8> %y, <64 x i8> %c, i32 %evl)
480  ret <64 x i8> %u
481}
482
483define <64 x i8> @vmacc_vx_nxv64i8(<64 x i8> %a, i8 %b, <64 x i8> %c,  <64 x i1> %m, i32 zeroext %evl) {
484; CHECK-LABEL: vmacc_vx_nxv64i8:
485; CHECK:       # %bb.0:
486; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, mu
487; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
488; CHECK-NEXT:    vmv4r.v v8, v12
489; CHECK-NEXT:    ret
490  %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0
491  %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer
492  %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl)
493  %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl)
494  %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
495  ret <64 x i8> %u
496}
497
498define <64 x i8> @vmacc_vx_nxv64i8_unmasked(<64 x i8> %a, i8 %b, <64 x i8> %c,  <64 x i1> %m, i32 zeroext %evl) {
499; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, ma
502; CHECK-NEXT:    vmacc.vx v12, a0, v8
503; CHECK-NEXT:    vmv4r.v v8, v12
504; CHECK-NEXT:    ret
505  %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0
506  %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer
507  %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl)
508  %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl)
509  %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> splat (i1 -1), <64 x i8> %y, <64 x i8> %c, i32 %evl)
510  ret <64 x i8> %u
511}
512
513define <64 x i8> @vmacc_vv_nxv64i8_ta(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c,  <64 x i1> %m, i32 zeroext %evl) {
514; CHECK-LABEL: vmacc_vv_nxv64i8_ta:
515; CHECK:       # %bb.0:
516; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
517; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
518; CHECK-NEXT:    vmv.v.v v8, v16
519; CHECK-NEXT:    ret
520  %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl)
521  %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl)
522  %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
523  ret <64 x i8> %u
524}
525
526define <64 x i8> @vmacc_vx_nxv64i8_ta(<64 x i8> %a, i8 %b, <64 x i8> %c,  <64 x i1> %m, i32 zeroext %evl) {
527; CHECK-LABEL: vmacc_vx_nxv64i8_ta:
528; CHECK:       # %bb.0:
529; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
530; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
531; CHECK-NEXT:    vmv.v.v v8, v12
532; CHECK-NEXT:    ret
533  %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0
534  %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer
535  %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl)
536  %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl)
537  %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
538  ret <64 x i8> %u
539}
540
541declare <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
542declare <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
543declare <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32)
544declare <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32)
545
546define <2 x i16> @vmacc_vv_nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c,  <2 x i1> %m, i32 zeroext %evl) {
547; CHECK-LABEL: vmacc_vv_nxv2i16:
548; CHECK:       # %bb.0:
549; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
550; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
551; CHECK-NEXT:    vmv1r.v v8, v10
552; CHECK-NEXT:    ret
553  %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl)
554  %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl)
555  %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
556  ret <2 x i16> %u
557}
558
559define <2 x i16> @vmacc_vv_nxv2i16_unmasked(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c,  <2 x i1> %m, i32 zeroext %evl) {
560; CHECK-LABEL: vmacc_vv_nxv2i16_unmasked:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
563; CHECK-NEXT:    vmacc.vv v10, v8, v9
564; CHECK-NEXT:    vmv1r.v v8, v10
565; CHECK-NEXT:    ret
566  %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl)
567  %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl)
568  %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> splat (i1 -1), <2 x i16> %y, <2 x i16> %c, i32 %evl)
569  ret <2 x i16> %u
570}
571
572define <2 x i16> @vmacc_vx_nxv2i16(<2 x i16> %a, i16 %b, <2 x i16> %c,  <2 x i1> %m, i32 zeroext %evl) {
573; CHECK-LABEL: vmacc_vx_nxv2i16:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
576; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
577; CHECK-NEXT:    vmv1r.v v8, v9
578; CHECK-NEXT:    ret
579  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
580  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
581  %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl)
582  %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl)
583  %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
584  ret <2 x i16> %u
585}
586
587define <2 x i16> @vmacc_vx_nxv2i16_unmasked(<2 x i16> %a, i16 %b, <2 x i16> %c,  <2 x i1> %m, i32 zeroext %evl) {
588; CHECK-LABEL: vmacc_vx_nxv2i16_unmasked:
589; CHECK:       # %bb.0:
590; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, ma
591; CHECK-NEXT:    vmacc.vx v9, a0, v8
592; CHECK-NEXT:    vmv1r.v v8, v9
593; CHECK-NEXT:    ret
594  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
595  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
596  %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl)
597  %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl)
598  %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> splat (i1 -1), <2 x i16> %y, <2 x i16> %c, i32 %evl)
599  ret <2 x i16> %u
600}
601
602define <2 x i16> @vmacc_vv_nxv2i16_ta(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c,  <2 x i1> %m, i32 zeroext %evl) {
603; CHECK-LABEL: vmacc_vv_nxv2i16_ta:
604; CHECK:       # %bb.0:
605; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
606; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
607; CHECK-NEXT:    vmv1r.v v8, v10
608; CHECK-NEXT:    ret
609  %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl)
610  %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl)
611  %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
612  ret <2 x i16> %u
613}
614
615define <2 x i16> @vmacc_vx_nxv2i16_ta(<2 x i16> %a, i16 %b, <2 x i16> %c,  <2 x i1> %m, i32 zeroext %evl) {
616; CHECK-LABEL: vmacc_vx_nxv2i16_ta:
617; CHECK:       # %bb.0:
618; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
619; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
620; CHECK-NEXT:    vmv1r.v v8, v9
621; CHECK-NEXT:    ret
622  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
623  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
624  %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl)
625  %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl)
626  %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
627  ret <2 x i16> %u
628}
629
630declare <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
631declare <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
632declare <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32)
633declare <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32)
634
635define <4 x i16> @vmacc_vv_nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c,  <4 x i1> %m, i32 zeroext %evl) {
636; CHECK-LABEL: vmacc_vv_nxv4i16:
637; CHECK:       # %bb.0:
638; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
639; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
640; CHECK-NEXT:    vmv1r.v v8, v10
641; CHECK-NEXT:    ret
642  %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl)
643  %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl)
644  %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
645  ret <4 x i16> %u
646}
647
648define <4 x i16> @vmacc_vv_nxv4i16_unmasked(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c,  <4 x i1> %m, i32 zeroext %evl) {
649; CHECK-LABEL: vmacc_vv_nxv4i16_unmasked:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
652; CHECK-NEXT:    vmacc.vv v10, v8, v9
653; CHECK-NEXT:    vmv1r.v v8, v10
654; CHECK-NEXT:    ret
655  %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl)
656  %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl)
657  %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> splat (i1 -1), <4 x i16> %y, <4 x i16> %c, i32 %evl)
658  ret <4 x i16> %u
659}
660
661define <4 x i16> @vmacc_vx_nxv4i16(<4 x i16> %a, i16 %b, <4 x i16> %c,  <4 x i1> %m, i32 zeroext %evl) {
662; CHECK-LABEL: vmacc_vx_nxv4i16:
663; CHECK:       # %bb.0:
664; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
665; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
666; CHECK-NEXT:    vmv1r.v v8, v9
667; CHECK-NEXT:    ret
668  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
669  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
670  %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl)
671  %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl)
672  %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
673  ret <4 x i16> %u
674}
675
676define <4 x i16> @vmacc_vx_nxv4i16_unmasked(<4 x i16> %a, i16 %b, <4 x i16> %c,  <4 x i1> %m, i32 zeroext %evl) {
677; CHECK-LABEL: vmacc_vx_nxv4i16_unmasked:
678; CHECK:       # %bb.0:
679; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, ma
680; CHECK-NEXT:    vmacc.vx v9, a0, v8
681; CHECK-NEXT:    vmv1r.v v8, v9
682; CHECK-NEXT:    ret
683  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
684  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
685  %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl)
686  %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl)
687  %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> splat (i1 -1), <4 x i16> %y, <4 x i16> %c, i32 %evl)
688  ret <4 x i16> %u
689}
690
691define <4 x i16> @vmacc_vv_nxv4i16_ta(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c,  <4 x i1> %m, i32 zeroext %evl) {
692; CHECK-LABEL: vmacc_vv_nxv4i16_ta:
693; CHECK:       # %bb.0:
694; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
695; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
696; CHECK-NEXT:    vmv1r.v v8, v10
697; CHECK-NEXT:    ret
698  %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl)
699  %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl)
700  %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
701  ret <4 x i16> %u
702}
703
704define <4 x i16> @vmacc_vx_nxv4i16_ta(<4 x i16> %a, i16 %b, <4 x i16> %c,  <4 x i1> %m, i32 zeroext %evl) {
705; CHECK-LABEL: vmacc_vx_nxv4i16_ta:
706; CHECK:       # %bb.0:
707; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
708; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
709; CHECK-NEXT:    vmv1r.v v8, v9
710; CHECK-NEXT:    ret
711  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
712  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
713  %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl)
714  %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl)
715  %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
716  ret <4 x i16> %u
717}
718
719declare <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
720declare <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
721declare <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32)
722declare <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32)
723
724define <8 x i16> @vmacc_vv_nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,  <8 x i1> %m, i32 zeroext %evl) {
725; CHECK-LABEL: vmacc_vv_nxv8i16:
726; CHECK:       # %bb.0:
727; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
728; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
729; CHECK-NEXT:    vmv1r.v v8, v10
730; CHECK-NEXT:    ret
731  %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl)
732  %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl)
733  %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
734  ret <8 x i16> %u
735}
736
737define <8 x i16> @vmacc_vv_nxv8i16_unmasked(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,  <8 x i1> %m, i32 zeroext %evl) {
738; CHECK-LABEL: vmacc_vv_nxv8i16_unmasked:
739; CHECK:       # %bb.0:
740; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
741; CHECK-NEXT:    vmacc.vv v10, v8, v9
742; CHECK-NEXT:    vmv1r.v v8, v10
743; CHECK-NEXT:    ret
744  %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl)
745  %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl)
746  %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> splat (i1 -1), <8 x i16> %y, <8 x i16> %c, i32 %evl)
747  ret <8 x i16> %u
748}
749
750define <8 x i16> @vmacc_vx_nxv8i16(<8 x i16> %a, i16 %b, <8 x i16> %c,  <8 x i1> %m, i32 zeroext %evl) {
751; CHECK-LABEL: vmacc_vx_nxv8i16:
752; CHECK:       # %bb.0:
753; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
754; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
755; CHECK-NEXT:    vmv1r.v v8, v9
756; CHECK-NEXT:    ret
757  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
758  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
759  %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl)
760  %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl)
761  %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
762  ret <8 x i16> %u
763}
764
765define <8 x i16> @vmacc_vx_nxv8i16_unmasked(<8 x i16> %a, i16 %b, <8 x i16> %c,  <8 x i1> %m, i32 zeroext %evl) {
766; CHECK-LABEL: vmacc_vx_nxv8i16_unmasked:
767; CHECK:       # %bb.0:
768; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, ma
769; CHECK-NEXT:    vmacc.vx v9, a0, v8
770; CHECK-NEXT:    vmv1r.v v8, v9
771; CHECK-NEXT:    ret
772  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
773  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
774  %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl)
775  %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl)
776  %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> splat (i1 -1), <8 x i16> %y, <8 x i16> %c, i32 %evl)
777  ret <8 x i16> %u
778}
779
780define <8 x i16> @vmacc_vv_nxv8i16_ta(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,  <8 x i1> %m, i32 zeroext %evl) {
781; CHECK-LABEL: vmacc_vv_nxv8i16_ta:
782; CHECK:       # %bb.0:
783; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
784; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
785; CHECK-NEXT:    vmv.v.v v8, v10
786; CHECK-NEXT:    ret
787  %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl)
788  %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl)
789  %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
790  ret <8 x i16> %u
791}
792
793define <8 x i16> @vmacc_vx_nxv8i16_ta(<8 x i16> %a, i16 %b, <8 x i16> %c,  <8 x i1> %m, i32 zeroext %evl) {
794; CHECK-LABEL: vmacc_vx_nxv8i16_ta:
795; CHECK:       # %bb.0:
796; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
797; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
798; CHECK-NEXT:    vmv.v.v v8, v9
799; CHECK-NEXT:    ret
800  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
801  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
802  %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl)
803  %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl)
804  %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
805  ret <8 x i16> %u
806}
807
808declare <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
809declare <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
810declare <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32)
811declare <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32)
812
813define <16 x i16> @vmacc_vv_nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c,  <16 x i1> %m, i32 zeroext %evl) {
814; CHECK-LABEL: vmacc_vv_nxv16i16:
815; CHECK:       # %bb.0:
816; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
817; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
818; CHECK-NEXT:    vmv2r.v v8, v12
819; CHECK-NEXT:    ret
820  %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl)
821  %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl)
822  %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
823  ret <16 x i16> %u
824}
825
826define <16 x i16> @vmacc_vv_nxv16i16_unmasked(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c,  <16 x i1> %m, i32 zeroext %evl) {
827; CHECK-LABEL: vmacc_vv_nxv16i16_unmasked:
828; CHECK:       # %bb.0:
829; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
830; CHECK-NEXT:    vmacc.vv v12, v8, v10
831; CHECK-NEXT:    vmv2r.v v8, v12
832; CHECK-NEXT:    ret
833  %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl)
834  %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl)
835  %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> splat (i1 -1), <16 x i16> %y, <16 x i16> %c, i32 %evl)
836  ret <16 x i16> %u
837}
838
839define <16 x i16> @vmacc_vx_nxv16i16(<16 x i16> %a, i16 %b, <16 x i16> %c,  <16 x i1> %m, i32 zeroext %evl) {
840; CHECK-LABEL: vmacc_vx_nxv16i16:
841; CHECK:       # %bb.0:
842; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
843; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
844; CHECK-NEXT:    vmv2r.v v8, v10
845; CHECK-NEXT:    ret
846  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
847  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
848  %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl)
849  %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl)
850  %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
851  ret <16 x i16> %u
852}
853
854define <16 x i16> @vmacc_vx_nxv16i16_unmasked(<16 x i16> %a, i16 %b, <16 x i16> %c,  <16 x i1> %m, i32 zeroext %evl) {
855; CHECK-LABEL: vmacc_vx_nxv16i16_unmasked:
856; CHECK:       # %bb.0:
857; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, ma
858; CHECK-NEXT:    vmacc.vx v10, a0, v8
859; CHECK-NEXT:    vmv2r.v v8, v10
860; CHECK-NEXT:    ret
861  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
862  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
863  %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl)
864  %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl)
865  %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> splat (i1 -1), <16 x i16> %y, <16 x i16> %c, i32 %evl)
866  ret <16 x i16> %u
867}
868
869define <16 x i16> @vmacc_vv_nxv16i16_ta(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c,  <16 x i1> %m, i32 zeroext %evl) {
870; CHECK-LABEL: vmacc_vv_nxv16i16_ta:
871; CHECK:       # %bb.0:
872; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
873; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
874; CHECK-NEXT:    vmv.v.v v8, v12
875; CHECK-NEXT:    ret
876  %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl)
877  %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl)
878  %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
879  ret <16 x i16> %u
880}
881
882define <16 x i16> @vmacc_vx_nxv16i16_ta(<16 x i16> %a, i16 %b, <16 x i16> %c,  <16 x i1> %m, i32 zeroext %evl) {
883; CHECK-LABEL: vmacc_vx_nxv16i16_ta:
884; CHECK:       # %bb.0:
885; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
886; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
887; CHECK-NEXT:    vmv.v.v v8, v10
888; CHECK-NEXT:    ret
889  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
890  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
891  %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl)
892  %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl)
893  %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
894  ret <16 x i16> %u
895}
896
897declare <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32)
898declare <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32)
899declare <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32)
900declare <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32)
901
902define <32 x i16> @vmacc_vv_nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c,  <32 x i1> %m, i32 zeroext %evl) {
903; CHECK-LABEL: vmacc_vv_nxv32i16:
904; CHECK:       # %bb.0:
905; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
906; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
907; CHECK-NEXT:    vmv4r.v v8, v16
908; CHECK-NEXT:    ret
909  %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl)
910  %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl)
911  %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
912  ret <32 x i16> %u
913}
914
915define <32 x i16> @vmacc_vv_nxv32i16_unmasked(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c,  <32 x i1> %m, i32 zeroext %evl) {
916; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked:
917; CHECK:       # %bb.0:
918; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, ma
919; CHECK-NEXT:    vmacc.vv v16, v8, v12
920; CHECK-NEXT:    vmv4r.v v8, v16
921; CHECK-NEXT:    ret
922  %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl)
923  %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl)
924  %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> splat (i1 -1), <32 x i16> %y, <32 x i16> %c, i32 %evl)
925  ret <32 x i16> %u
926}
927
928define <32 x i16> @vmacc_vx_nxv32i16(<32 x i16> %a, i16 %b, <32 x i16> %c,  <32 x i1> %m, i32 zeroext %evl) {
929; CHECK-LABEL: vmacc_vx_nxv32i16:
930; CHECK:       # %bb.0:
931; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
932; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
933; CHECK-NEXT:    vmv4r.v v8, v12
934; CHECK-NEXT:    ret
935  %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0
936  %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer
937  %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl)
938  %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl)
939  %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
940  ret <32 x i16> %u
941}
942
943define <32 x i16> @vmacc_vx_nxv32i16_unmasked(<32 x i16> %a, i16 %b, <32 x i16> %c,  <32 x i1> %m, i32 zeroext %evl) {
944; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked:
945; CHECK:       # %bb.0:
946; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, ma
947; CHECK-NEXT:    vmacc.vx v12, a0, v8
948; CHECK-NEXT:    vmv4r.v v8, v12
949; CHECK-NEXT:    ret
950  %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0
951  %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer
952  %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl)
953  %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl)
954  %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> splat (i1 -1), <32 x i16> %y, <32 x i16> %c, i32 %evl)
955  ret <32 x i16> %u
956}
957
958define <32 x i16> @vmacc_vv_nxv32i16_ta(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c,  <32 x i1> %m, i32 zeroext %evl) {
959; CHECK-LABEL: vmacc_vv_nxv32i16_ta:
960; CHECK:       # %bb.0:
961; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
962; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
963; CHECK-NEXT:    vmv.v.v v8, v16
964; CHECK-NEXT:    ret
965  %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl)
966  %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl)
967  %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
968  ret <32 x i16> %u
969}
970
971define <32 x i16> @vmacc_vx_nxv32i16_ta(<32 x i16> %a, i16 %b, <32 x i16> %c,  <32 x i1> %m, i32 zeroext %evl) {
972; CHECK-LABEL: vmacc_vx_nxv32i16_ta:
973; CHECK:       # %bb.0:
974; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
975; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
976; CHECK-NEXT:    vmv.v.v v8, v12
977; CHECK-NEXT:    ret
978  %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0
979  %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer
980  %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl)
981  %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl)
982  %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
983  ret <32 x i16> %u
984}
985
986declare <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
987declare <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
988declare <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32)
989declare <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32)
990
991define <2 x i32> @vmacc_vv_nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c,  <2 x i1> %m, i32 zeroext %evl) {
992; CHECK-LABEL: vmacc_vv_nxv2i32:
993; CHECK:       # %bb.0:
994; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
995; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
996; CHECK-NEXT:    vmv1r.v v8, v10
997; CHECK-NEXT:    ret
998  %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl)
999  %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl)
1000  %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1001  ret <2 x i32> %u
1002}
1003
1004define <2 x i32> @vmacc_vv_nxv2i32_unmasked(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c,  <2 x i1> %m, i32 zeroext %evl) {
1005; CHECK-LABEL: vmacc_vv_nxv2i32_unmasked:
1006; CHECK:       # %bb.0:
1007; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
1008; CHECK-NEXT:    vmacc.vv v10, v8, v9
1009; CHECK-NEXT:    vmv1r.v v8, v10
1010; CHECK-NEXT:    ret
1011  %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl)
1012  %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl)
1013  %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> splat (i1 -1), <2 x i32> %y, <2 x i32> %c, i32 %evl)
1014  ret <2 x i32> %u
1015}
1016
1017define <2 x i32> @vmacc_vx_nxv2i32(<2 x i32> %a, i32 %b, <2 x i32> %c,  <2 x i1> %m, i32 zeroext %evl) {
1018; CHECK-LABEL: vmacc_vx_nxv2i32:
1019; CHECK:       # %bb.0:
1020; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
1021; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1022; CHECK-NEXT:    vmv1r.v v8, v9
1023; CHECK-NEXT:    ret
1024  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
1025  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
1026  %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl)
1027  %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl)
1028  %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1029  ret <2 x i32> %u
1030}
1031
1032define <2 x i32> @vmacc_vx_nxv2i32_unmasked(<2 x i32> %a, i32 %b, <2 x i32> %c,  <2 x i1> %m, i32 zeroext %evl) {
1033; CHECK-LABEL: vmacc_vx_nxv2i32_unmasked:
1034; CHECK:       # %bb.0:
1035; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, ma
1036; CHECK-NEXT:    vmacc.vx v9, a0, v8
1037; CHECK-NEXT:    vmv1r.v v8, v9
1038; CHECK-NEXT:    ret
1039  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
1040  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
1041  %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl)
1042  %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl)
1043  %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> splat (i1 -1), <2 x i32> %y, <2 x i32> %c, i32 %evl)
1044  ret <2 x i32> %u
1045}
1046
1047define <2 x i32> @vmacc_vv_nxv2i32_ta(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c,  <2 x i1> %m, i32 zeroext %evl) {
1048; CHECK-LABEL: vmacc_vv_nxv2i32_ta:
1049; CHECK:       # %bb.0:
1050; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
1051; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1052; CHECK-NEXT:    vmv1r.v v8, v10
1053; CHECK-NEXT:    ret
1054  %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl)
1055  %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl)
1056  %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1057  ret <2 x i32> %u
1058}
1059
1060define <2 x i32> @vmacc_vx_nxv2i32_ta(<2 x i32> %a, i32 %b, <2 x i32> %c,  <2 x i1> %m, i32 zeroext %evl) {
1061; CHECK-LABEL: vmacc_vx_nxv2i32_ta:
1062; CHECK:       # %bb.0:
1063; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
1064; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1065; CHECK-NEXT:    vmv1r.v v8, v9
1066; CHECK-NEXT:    ret
1067  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
1068  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
1069  %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl)
1070  %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl)
1071  %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1072  ret <2 x i32> %u
1073}
1074
1075declare <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
1076declare <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
1077declare <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32)
1078declare <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32)
1079
1080define <4 x i32> @vmacc_vv_nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,  <4 x i1> %m, i32 zeroext %evl) {
1081; CHECK-LABEL: vmacc_vv_nxv4i32:
1082; CHECK:       # %bb.0:
1083; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
1084; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1085; CHECK-NEXT:    vmv1r.v v8, v10
1086; CHECK-NEXT:    ret
1087  %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl)
1088  %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl)
1089  %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1090  ret <4 x i32> %u
1091}
1092
1093define <4 x i32> @vmacc_vv_nxv4i32_unmasked(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,  <4 x i1> %m, i32 zeroext %evl) {
1094; CHECK-LABEL: vmacc_vv_nxv4i32_unmasked:
1095; CHECK:       # %bb.0:
1096; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
1097; CHECK-NEXT:    vmacc.vv v10, v8, v9
1098; CHECK-NEXT:    vmv1r.v v8, v10
1099; CHECK-NEXT:    ret
1100  %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl)
1101  %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl)
1102  %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> splat (i1 -1), <4 x i32> %y, <4 x i32> %c, i32 %evl)
1103  ret <4 x i32> %u
1104}
1105
1106define <4 x i32> @vmacc_vx_nxv4i32(<4 x i32> %a, i32 %b, <4 x i32> %c,  <4 x i1> %m, i32 zeroext %evl) {
1107; CHECK-LABEL: vmacc_vx_nxv4i32:
1108; CHECK:       # %bb.0:
1109; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
1110; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1111; CHECK-NEXT:    vmv1r.v v8, v9
1112; CHECK-NEXT:    ret
1113  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
1114  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
1115  %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl)
1116  %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl)
1117  %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1118  ret <4 x i32> %u
1119}
1120
1121define <4 x i32> @vmacc_vx_nxv4i32_unmasked(<4 x i32> %a, i32 %b, <4 x i32> %c,  <4 x i1> %m, i32 zeroext %evl) {
1122; CHECK-LABEL: vmacc_vx_nxv4i32_unmasked:
1123; CHECK:       # %bb.0:
1124; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
1125; CHECK-NEXT:    vmacc.vx v9, a0, v8
1126; CHECK-NEXT:    vmv1r.v v8, v9
1127; CHECK-NEXT:    ret
1128  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
1129  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
1130  %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl)
1131  %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl)
1132  %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> splat (i1 -1), <4 x i32> %y, <4 x i32> %c, i32 %evl)
1133  ret <4 x i32> %u
1134}
1135
1136define <4 x i32> @vmacc_vv_nxv4i32_ta(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,  <4 x i1> %m, i32 zeroext %evl) {
1137; CHECK-LABEL: vmacc_vv_nxv4i32_ta:
1138; CHECK:       # %bb.0:
1139; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1140; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1141; CHECK-NEXT:    vmv.v.v v8, v10
1142; CHECK-NEXT:    ret
1143  %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl)
1144  %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl)
1145  %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1146  ret <4 x i32> %u
1147}
1148
1149define <4 x i32> @vmacc_vx_nxv4i32_ta(<4 x i32> %a, i32 %b, <4 x i32> %c,  <4 x i1> %m, i32 zeroext %evl) {
1150; CHECK-LABEL: vmacc_vx_nxv4i32_ta:
1151; CHECK:       # %bb.0:
1152; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
1153; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1154; CHECK-NEXT:    vmv.v.v v8, v9
1155; CHECK-NEXT:    ret
1156  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
1157  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
1158  %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl)
1159  %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl)
1160  %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1161  ret <4 x i32> %u
1162}
1163
1164declare <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
1165declare <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
1166declare <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)
1167declare <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)
1168
1169define <8 x i32> @vmacc_vv_nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c,  <8 x i1> %m, i32 zeroext %evl) {
1170; CHECK-LABEL: vmacc_vv_nxv8i32:
1171; CHECK:       # %bb.0:
1172; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
1173; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1174; CHECK-NEXT:    vmv2r.v v8, v12
1175; CHECK-NEXT:    ret
1176  %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl)
1177  %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl)
1178  %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1179  ret <8 x i32> %u
1180}
1181
1182define <8 x i32> @vmacc_vv_nxv8i32_unmasked(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c,  <8 x i1> %m, i32 zeroext %evl) {
1183; CHECK-LABEL: vmacc_vv_nxv8i32_unmasked:
1184; CHECK:       # %bb.0:
1185; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
1186; CHECK-NEXT:    vmacc.vv v12, v8, v10
1187; CHECK-NEXT:    vmv2r.v v8, v12
1188; CHECK-NEXT:    ret
1189  %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl)
1190  %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl)
1191  %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> splat (i1 -1), <8 x i32> %y, <8 x i32> %c, i32 %evl)
1192  ret <8 x i32> %u
1193}
1194
1195define <8 x i32> @vmacc_vx_nxv8i32(<8 x i32> %a, i32 %b, <8 x i32> %c,  <8 x i1> %m, i32 zeroext %evl) {
1196; CHECK-LABEL: vmacc_vx_nxv8i32:
1197; CHECK:       # %bb.0:
1198; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
1199; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
1200; CHECK-NEXT:    vmv2r.v v8, v10
1201; CHECK-NEXT:    ret
1202  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
1203  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
1204  %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl)
1205  %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl)
1206  %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1207  ret <8 x i32> %u
1208}
1209
1210define <8 x i32> @vmacc_vx_nxv8i32_unmasked(<8 x i32> %a, i32 %b, <8 x i32> %c,  <8 x i1> %m, i32 zeroext %evl) {
1211; CHECK-LABEL: vmacc_vx_nxv8i32_unmasked:
1212; CHECK:       # %bb.0:
1213; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, ma
1214; CHECK-NEXT:    vmacc.vx v10, a0, v8
1215; CHECK-NEXT:    vmv2r.v v8, v10
1216; CHECK-NEXT:    ret
1217  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
1218  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
1219  %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl)
1220  %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl)
1221  %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> splat (i1 -1), <8 x i32> %y, <8 x i32> %c, i32 %evl)
1222  ret <8 x i32> %u
1223}
1224
1225define <8 x i32> @vmacc_vv_nxv8i32_ta(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c,  <8 x i1> %m, i32 zeroext %evl) {
1226; CHECK-LABEL: vmacc_vv_nxv8i32_ta:
1227; CHECK:       # %bb.0:
1228; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1229; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1230; CHECK-NEXT:    vmv.v.v v8, v12
1231; CHECK-NEXT:    ret
1232  %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl)
1233  %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl)
1234  %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1235  ret <8 x i32> %u
1236}
1237
1238define <8 x i32> @vmacc_vx_nxv8i32_ta(<8 x i32> %a, i32 %b, <8 x i32> %c,  <8 x i1> %m, i32 zeroext %evl) {
1239; CHECK-LABEL: vmacc_vx_nxv8i32_ta:
1240; CHECK:       # %bb.0:
1241; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
1242; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
1243; CHECK-NEXT:    vmv.v.v v8, v10
1244; CHECK-NEXT:    ret
1245  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
1246  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
1247  %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl)
1248  %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl)
1249  %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1250  ret <8 x i32> %u
1251}
1252
1253declare <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
1254declare <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
1255declare <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32)
1256declare <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32)
1257
1258define <16 x i32> @vmacc_vv_nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c,  <16 x i1> %m, i32 zeroext %evl) {
1259; CHECK-LABEL: vmacc_vv_nxv16i32:
1260; CHECK:       # %bb.0:
1261; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
1262; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1263; CHECK-NEXT:    vmv4r.v v8, v16
1264; CHECK-NEXT:    ret
1265  %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl)
1266  %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl)
1267  %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1268  ret <16 x i32> %u
1269}
1270
1271define <16 x i32> @vmacc_vv_nxv16i32_unmasked(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c,  <16 x i1> %m, i32 zeroext %evl) {
1272; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked:
1273; CHECK:       # %bb.0:
1274; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
1275; CHECK-NEXT:    vmacc.vv v16, v8, v12
1276; CHECK-NEXT:    vmv4r.v v8, v16
1277; CHECK-NEXT:    ret
1278  %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl)
1279  %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl)
1280  %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> splat (i1 -1), <16 x i32> %y, <16 x i32> %c, i32 %evl)
1281  ret <16 x i32> %u
1282}
1283
1284define <16 x i32> @vmacc_vx_nxv16i32(<16 x i32> %a, i32 %b, <16 x i32> %c,  <16 x i1> %m, i32 zeroext %evl) {
1285; CHECK-LABEL: vmacc_vx_nxv16i32:
1286; CHECK:       # %bb.0:
1287; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
1288; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
1289; CHECK-NEXT:    vmv4r.v v8, v12
1290; CHECK-NEXT:    ret
1291  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
1292  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
1293  %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl)
1294  %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl)
1295  %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1296  ret <16 x i32> %u
1297}
1298
1299define <16 x i32> @vmacc_vx_nxv16i32_unmasked(<16 x i32> %a, i32 %b, <16 x i32> %c,  <16 x i1> %m, i32 zeroext %evl) {
1300; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked:
1301; CHECK:       # %bb.0:
1302; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, ma
1303; CHECK-NEXT:    vmacc.vx v12, a0, v8
1304; CHECK-NEXT:    vmv4r.v v8, v12
1305; CHECK-NEXT:    ret
1306  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
1307  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
1308  %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl)
1309  %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl)
1310  %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> splat (i1 -1), <16 x i32> %y, <16 x i32> %c, i32 %evl)
1311  ret <16 x i32> %u
1312}
1313
1314define <16 x i32> @vmacc_vv_nxv16i32_ta(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c,  <16 x i1> %m, i32 zeroext %evl) {
1315; CHECK-LABEL: vmacc_vv_nxv16i32_ta:
1316; CHECK:       # %bb.0:
1317; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1318; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1319; CHECK-NEXT:    vmv.v.v v8, v16
1320; CHECK-NEXT:    ret
1321  %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl)
1322  %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl)
1323  %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1324  ret <16 x i32> %u
1325}
1326
1327define <16 x i32> @vmacc_vx_nxv16i32_ta(<16 x i32> %a, i32 %b, <16 x i32> %c,  <16 x i1> %m, i32 zeroext %evl) {
1328; CHECK-LABEL: vmacc_vx_nxv16i32_ta:
1329; CHECK:       # %bb.0:
1330; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1331; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
1332; CHECK-NEXT:    vmv.v.v v8, v12
1333; CHECK-NEXT:    ret
1334  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
1335  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
1336  %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl)
1337  %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl)
1338  %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1339  ret <16 x i32> %u
1340}
1341
1342declare <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
1343declare <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
1344declare <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32)
1345declare <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32)
1346
1347define <2 x i64> @vmacc_vv_nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,  <2 x i1> %m, i32 zeroext %evl) {
1348; CHECK-LABEL: vmacc_vv_nxv2i64:
1349; CHECK:       # %bb.0:
1350; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
1351; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1352; CHECK-NEXT:    vmv1r.v v8, v10
1353; CHECK-NEXT:    ret
1354  %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl)
1355  %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl)
1356  %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1357  ret <2 x i64> %u
1358}
1359
1360define <2 x i64> @vmacc_vv_nxv2i64_unmasked(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,  <2 x i1> %m, i32 zeroext %evl) {
1361; CHECK-LABEL: vmacc_vv_nxv2i64_unmasked:
1362; CHECK:       # %bb.0:
1363; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
1364; CHECK-NEXT:    vmacc.vv v10, v8, v9
1365; CHECK-NEXT:    vmv1r.v v8, v10
1366; CHECK-NEXT:    ret
1367  %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl)
1368  %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl)
1369  %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> splat (i1 -1), <2 x i64> %y, <2 x i64> %c, i32 %evl)
1370  ret <2 x i64> %u
1371}
1372
1373define <2 x i64> @vmacc_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c,  <2 x i1> %m, i32 zeroext %evl) {
1374; RV32-LABEL: vmacc_vx_nxv2i64:
1375; RV32:       # %bb.0:
1376; RV32-NEXT:    addi sp, sp, -16
1377; RV32-NEXT:    .cfi_def_cfa_offset 16
1378; RV32-NEXT:    sw a0, 8(sp)
1379; RV32-NEXT:    sw a1, 12(sp)
1380; RV32-NEXT:    addi a0, sp, 8
1381; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1382; RV32-NEXT:    vlse64.v v10, (a0), zero
1383; RV32-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
1384; RV32-NEXT:    vmacc.vv v9, v8, v10, v0.t
1385; RV32-NEXT:    vmv1r.v v8, v9
1386; RV32-NEXT:    addi sp, sp, 16
1387; RV32-NEXT:    .cfi_def_cfa_offset 0
1388; RV32-NEXT:    ret
1389;
1390; RV64-LABEL: vmacc_vx_nxv2i64:
1391; RV64:       # %bb.0:
1392; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
1393; RV64-NEXT:    vmacc.vx v9, a0, v8, v0.t
1394; RV64-NEXT:    vmv1r.v v8, v9
1395; RV64-NEXT:    ret
1396  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
1397  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
1398  %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl)
1399  %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl)
1400  %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1401  ret <2 x i64> %u
1402}
1403
1404define <2 x i64> @vmacc_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c,  <2 x i1> %m, i32 zeroext %evl) {
1405; RV32-LABEL: vmacc_vx_nxv2i64_unmasked:
1406; RV32:       # %bb.0:
1407; RV32-NEXT:    addi sp, sp, -16
1408; RV32-NEXT:    .cfi_def_cfa_offset 16
1409; RV32-NEXT:    sw a0, 8(sp)
1410; RV32-NEXT:    sw a1, 12(sp)
1411; RV32-NEXT:    addi a0, sp, 8
1412; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1413; RV32-NEXT:    vlse64.v v10, (a0), zero
1414; RV32-NEXT:    vsetvli zero, a2, e64, m1, tu, ma
1415; RV32-NEXT:    vmacc.vv v9, v8, v10
1416; RV32-NEXT:    vmv1r.v v8, v9
1417; RV32-NEXT:    addi sp, sp, 16
1418; RV32-NEXT:    .cfi_def_cfa_offset 0
1419; RV32-NEXT:    ret
1420;
1421; RV64-LABEL: vmacc_vx_nxv2i64_unmasked:
1422; RV64:       # %bb.0:
1423; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, ma
1424; RV64-NEXT:    vmacc.vx v9, a0, v8
1425; RV64-NEXT:    vmv1r.v v8, v9
1426; RV64-NEXT:    ret
1427  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
1428  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
1429  %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl)
1430  %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl)
1431  %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> splat (i1 -1), <2 x i64> %y, <2 x i64> %c, i32 %evl)
1432  ret <2 x i64> %u
1433}
1434
1435define <2 x i64> @vmacc_vv_nxv2i64_ta(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,  <2 x i1> %m, i32 zeroext %evl) {
1436; CHECK-LABEL: vmacc_vv_nxv2i64_ta:
1437; CHECK:       # %bb.0:
1438; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1439; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1440; CHECK-NEXT:    vmv.v.v v8, v10
1441; CHECK-NEXT:    ret
1442  %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl)
1443  %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl)
1444  %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1445  ret <2 x i64> %u
1446}
1447
1448define <2 x i64> @vmacc_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c,  <2 x i1> %m, i32 zeroext %evl) {
1449; RV32-LABEL: vmacc_vx_nxv2i64_ta:
1450; RV32:       # %bb.0:
1451; RV32-NEXT:    addi sp, sp, -16
1452; RV32-NEXT:    .cfi_def_cfa_offset 16
1453; RV32-NEXT:    sw a0, 8(sp)
1454; RV32-NEXT:    sw a1, 12(sp)
1455; RV32-NEXT:    addi a0, sp, 8
1456; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1457; RV32-NEXT:    vlse64.v v10, (a0), zero
1458; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
1459; RV32-NEXT:    vmacc.vv v9, v8, v10, v0.t
1460; RV32-NEXT:    vmv.v.v v8, v9
1461; RV32-NEXT:    addi sp, sp, 16
1462; RV32-NEXT:    .cfi_def_cfa_offset 0
1463; RV32-NEXT:    ret
1464;
1465; RV64-LABEL: vmacc_vx_nxv2i64_ta:
1466; RV64:       # %bb.0:
1467; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
1468; RV64-NEXT:    vmacc.vx v9, a0, v8, v0.t
1469; RV64-NEXT:    vmv.v.v v8, v9
1470; RV64-NEXT:    ret
1471  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
1472  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
1473  %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl)
1474  %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl)
1475  %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1476  ret <2 x i64> %u
1477}
1478
1479declare <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
1480declare <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
1481declare <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32)
1482declare <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32)
1483
1484define <4 x i64> @vmacc_vv_nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c,  <4 x i1> %m, i32 zeroext %evl) {
1485; CHECK-LABEL: vmacc_vv_nxv4i64:
1486; CHECK:       # %bb.0:
1487; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
1488; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1489; CHECK-NEXT:    vmv2r.v v8, v12
1490; CHECK-NEXT:    ret
1491  %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl)
1492  %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl)
1493  %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1494  ret <4 x i64> %u
1495}
1496
1497define <4 x i64> @vmacc_vv_nxv4i64_unmasked(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c,  <4 x i1> %m, i32 zeroext %evl) {
1498; CHECK-LABEL: vmacc_vv_nxv4i64_unmasked:
1499; CHECK:       # %bb.0:
1500; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
1501; CHECK-NEXT:    vmacc.vv v12, v8, v10
1502; CHECK-NEXT:    vmv2r.v v8, v12
1503; CHECK-NEXT:    ret
1504  %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl)
1505  %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl)
1506  %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> splat (i1 -1), <4 x i64> %y, <4 x i64> %c, i32 %evl)
1507  ret <4 x i64> %u
1508}
1509
1510define <4 x i64> @vmacc_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c,  <4 x i1> %m, i32 zeroext %evl) {
1511; RV32-LABEL: vmacc_vx_nxv4i64:
1512; RV32:       # %bb.0:
1513; RV32-NEXT:    addi sp, sp, -16
1514; RV32-NEXT:    .cfi_def_cfa_offset 16
1515; RV32-NEXT:    sw a0, 8(sp)
1516; RV32-NEXT:    sw a1, 12(sp)
1517; RV32-NEXT:    addi a0, sp, 8
1518; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1519; RV32-NEXT:    vlse64.v v12, (a0), zero
1520; RV32-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
1521; RV32-NEXT:    vmacc.vv v10, v8, v12, v0.t
1522; RV32-NEXT:    vmv2r.v v8, v10
1523; RV32-NEXT:    addi sp, sp, 16
1524; RV32-NEXT:    .cfi_def_cfa_offset 0
1525; RV32-NEXT:    ret
1526;
1527; RV64-LABEL: vmacc_vx_nxv4i64:
1528; RV64:       # %bb.0:
1529; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
1530; RV64-NEXT:    vmacc.vx v10, a0, v8, v0.t
1531; RV64-NEXT:    vmv2r.v v8, v10
1532; RV64-NEXT:    ret
1533  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
1534  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
1535  %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl)
1536  %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl)
1537  %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1538  ret <4 x i64> %u
1539}
1540
1541define <4 x i64> @vmacc_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c,  <4 x i1> %m, i32 zeroext %evl) {
1542; RV32-LABEL: vmacc_vx_nxv4i64_unmasked:
1543; RV32:       # %bb.0:
1544; RV32-NEXT:    addi sp, sp, -16
1545; RV32-NEXT:    .cfi_def_cfa_offset 16
1546; RV32-NEXT:    sw a0, 8(sp)
1547; RV32-NEXT:    sw a1, 12(sp)
1548; RV32-NEXT:    addi a0, sp, 8
1549; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1550; RV32-NEXT:    vlse64.v v12, (a0), zero
1551; RV32-NEXT:    vsetvli zero, a2, e64, m2, tu, ma
1552; RV32-NEXT:    vmacc.vv v10, v8, v12
1553; RV32-NEXT:    vmv2r.v v8, v10
1554; RV32-NEXT:    addi sp, sp, 16
1555; RV32-NEXT:    .cfi_def_cfa_offset 0
1556; RV32-NEXT:    ret
1557;
1558; RV64-LABEL: vmacc_vx_nxv4i64_unmasked:
1559; RV64:       # %bb.0:
1560; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, ma
1561; RV64-NEXT:    vmacc.vx v10, a0, v8
1562; RV64-NEXT:    vmv2r.v v8, v10
1563; RV64-NEXT:    ret
1564  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
1565  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
1566  %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl)
1567  %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl)
1568  %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> splat (i1 -1), <4 x i64> %y, <4 x i64> %c, i32 %evl)
1569  ret <4 x i64> %u
1570}
1571
1572define <4 x i64> @vmacc_vv_nxv4i64_ta(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c,  <4 x i1> %m, i32 zeroext %evl) {
1573; CHECK-LABEL: vmacc_vv_nxv4i64_ta:
1574; CHECK:       # %bb.0:
1575; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1576; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1577; CHECK-NEXT:    vmv.v.v v8, v12
1578; CHECK-NEXT:    ret
1579  %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl)
1580  %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl)
1581  %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1582  ret <4 x i64> %u
1583}
1584
1585define <4 x i64> @vmacc_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c,  <4 x i1> %m, i32 zeroext %evl) {
1586; RV32-LABEL: vmacc_vx_nxv4i64_ta:
1587; RV32:       # %bb.0:
1588; RV32-NEXT:    addi sp, sp, -16
1589; RV32-NEXT:    .cfi_def_cfa_offset 16
1590; RV32-NEXT:    sw a0, 8(sp)
1591; RV32-NEXT:    sw a1, 12(sp)
1592; RV32-NEXT:    addi a0, sp, 8
1593; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1594; RV32-NEXT:    vlse64.v v12, (a0), zero
1595; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
1596; RV32-NEXT:    vmacc.vv v10, v8, v12, v0.t
1597; RV32-NEXT:    vmv.v.v v8, v10
1598; RV32-NEXT:    addi sp, sp, 16
1599; RV32-NEXT:    .cfi_def_cfa_offset 0
1600; RV32-NEXT:    ret
1601;
1602; RV64-LABEL: vmacc_vx_nxv4i64_ta:
1603; RV64:       # %bb.0:
1604; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
1605; RV64-NEXT:    vmacc.vx v10, a0, v8, v0.t
1606; RV64-NEXT:    vmv.v.v v8, v10
1607; RV64-NEXT:    ret
1608  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
1609  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
1610  %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl)
1611  %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl)
1612  %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1613  ret <4 x i64> %u
1614}
1615
1616declare <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
1617declare <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
1618declare <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32)
1619declare <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32)
1620
1621define <8 x i64> @vmacc_vv_nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c,  <8 x i1> %m, i32 zeroext %evl) {
1622; CHECK-LABEL: vmacc_vv_nxv8i64:
1623; CHECK:       # %bb.0:
1624; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
1625; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1626; CHECK-NEXT:    vmv4r.v v8, v16
1627; CHECK-NEXT:    ret
1628  %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl)
1629  %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl)
1630  %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1631  ret <8 x i64> %u
1632}
1633
1634define <8 x i64> @vmacc_vv_nxv8i64_unmasked(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c,  <8 x i1> %m, i32 zeroext %evl) {
1635; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked:
1636; CHECK:       # %bb.0:
1637; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
1638; CHECK-NEXT:    vmacc.vv v16, v8, v12
1639; CHECK-NEXT:    vmv4r.v v8, v16
1640; CHECK-NEXT:    ret
1641  %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl)
1642  %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl)
1643  %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> splat (i1 -1), <8 x i64> %y, <8 x i64> %c, i32 %evl)
1644  ret <8 x i64> %u
1645}
1646
1647define <8 x i64> @vmacc_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c,  <8 x i1> %m, i32 zeroext %evl) {
1648; RV32-LABEL: vmacc_vx_nxv8i64:
1649; RV32:       # %bb.0:
1650; RV32-NEXT:    addi sp, sp, -16
1651; RV32-NEXT:    .cfi_def_cfa_offset 16
1652; RV32-NEXT:    sw a0, 8(sp)
1653; RV32-NEXT:    sw a1, 12(sp)
1654; RV32-NEXT:    addi a0, sp, 8
1655; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1656; RV32-NEXT:    vlse64.v v16, (a0), zero
1657; RV32-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
1658; RV32-NEXT:    vmacc.vv v12, v8, v16, v0.t
1659; RV32-NEXT:    vmv4r.v v8, v12
1660; RV32-NEXT:    addi sp, sp, 16
1661; RV32-NEXT:    .cfi_def_cfa_offset 0
1662; RV32-NEXT:    ret
1663;
1664; RV64-LABEL: vmacc_vx_nxv8i64:
1665; RV64:       # %bb.0:
1666; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
1667; RV64-NEXT:    vmacc.vx v12, a0, v8, v0.t
1668; RV64-NEXT:    vmv4r.v v8, v12
1669; RV64-NEXT:    ret
1670  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
1671  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
1672  %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl)
1673  %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl)
1674  %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1675  ret <8 x i64> %u
1676}
1677
1678define <8 x i64> @vmacc_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c,  <8 x i1> %m, i32 zeroext %evl) {
1679; RV32-LABEL: vmacc_vx_nxv8i64_unmasked:
1680; RV32:       # %bb.0:
1681; RV32-NEXT:    addi sp, sp, -16
1682; RV32-NEXT:    .cfi_def_cfa_offset 16
1683; RV32-NEXT:    sw a0, 8(sp)
1684; RV32-NEXT:    sw a1, 12(sp)
1685; RV32-NEXT:    addi a0, sp, 8
1686; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1687; RV32-NEXT:    vlse64.v v16, (a0), zero
1688; RV32-NEXT:    vsetvli zero, a2, e64, m4, tu, ma
1689; RV32-NEXT:    vmacc.vv v12, v8, v16
1690; RV32-NEXT:    vmv4r.v v8, v12
1691; RV32-NEXT:    addi sp, sp, 16
1692; RV32-NEXT:    .cfi_def_cfa_offset 0
1693; RV32-NEXT:    ret
1694;
1695; RV64-LABEL: vmacc_vx_nxv8i64_unmasked:
1696; RV64:       # %bb.0:
1697; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, ma
1698; RV64-NEXT:    vmacc.vx v12, a0, v8
1699; RV64-NEXT:    vmv4r.v v8, v12
1700; RV64-NEXT:    ret
1701  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
1702  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
1703  %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl)
1704  %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl)
1705  %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> splat (i1 -1), <8 x i64> %y, <8 x i64> %c, i32 %evl)
1706  ret <8 x i64> %u
1707}
1708
1709define <8 x i64> @vmacc_vv_nxv8i64_ta(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c,  <8 x i1> %m, i32 zeroext %evl) {
1710; CHECK-LABEL: vmacc_vv_nxv8i64_ta:
1711; CHECK:       # %bb.0:
1712; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1713; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1714; CHECK-NEXT:    vmv.v.v v8, v16
1715; CHECK-NEXT:    ret
1716  %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl)
1717  %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl)
1718  %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1719  ret <8 x i64> %u
1720}
1721
1722define <8 x i64> @vmacc_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c,  <8 x i1> %m, i32 zeroext %evl) {
1723; RV32-LABEL: vmacc_vx_nxv8i64_ta:
1724; RV32:       # %bb.0:
1725; RV32-NEXT:    addi sp, sp, -16
1726; RV32-NEXT:    .cfi_def_cfa_offset 16
1727; RV32-NEXT:    sw a0, 8(sp)
1728; RV32-NEXT:    sw a1, 12(sp)
1729; RV32-NEXT:    addi a0, sp, 8
1730; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1731; RV32-NEXT:    vlse64.v v16, (a0), zero
1732; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
1733; RV32-NEXT:    vmacc.vv v12, v8, v16, v0.t
1734; RV32-NEXT:    vmv.v.v v8, v12
1735; RV32-NEXT:    addi sp, sp, 16
1736; RV32-NEXT:    .cfi_def_cfa_offset 0
1737; RV32-NEXT:    ret
1738;
1739; RV64-LABEL: vmacc_vx_nxv8i64_ta:
1740; RV64:       # %bb.0:
1741; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
1742; RV64-NEXT:    vmacc.vx v12, a0, v8, v0.t
1743; RV64-NEXT:    vmv.v.v v8, v12
1744; RV64-NEXT:    ret
1745  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
1746  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
1747  %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl)
1748  %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl)
1749  %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1750  ret <8 x i64> %u
1751}
1752